Loading packages
#===============================================================================
#BTC.LineZero.Header.1.1.0
#===============================================================================
#R Markdown environment setup and reporting utility.
#===============================================================================
#RLB.Dependencies:
# knitr, magrittr, pacman, rio, rmarkdown, rmdformats, tibble, yaml
#===============================================================================
#Input for document parameters, libraries, file paths, and options.
#=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=
knitr::opts_chunk$set(message=FALSE, warning = FALSE)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c(
"readxl", "phyloseq", "tidyverse", "pacman", "yaml"
)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c("readxl", "phyloseq", "tidyverse", "pacman", "yaml", "ggplot2", "vegan", "microbiome", "ggpubr", "viridis", "decontam", "gridExtra", "ggpubr", "lme4", "lmerTest", "writexl", "harrietr", "Maaslin2", "ggtext", "ggpmisc", "gridExtra", "gamm4", "reshape2", "kableExtra", "knitr")
YAML_header <-
'---
title: "Host-DNA depletion 1: data wrangling"
author: "Minsik Kim"
date: "2032.03.10"
output:
rmdformats::downcute:
downcute_theme: "chaos"
code_folding: hide
fig_width: 6
fig_height: 6
---'
seed <- "20230314"
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Loads libraries, file paths, and other document options.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Boot <- function() {
.libPaths(path_library)
require(pacman)
pacman::p_load(c("knitr", "rmarkdown", "rmdformats", "yaml"))
knitr::opts_knit$set(root.dir = path_working)
str_libraries |> unique() |> sort() -> str_libraries
pacman::p_load(char = str_libraries)
set.seed(seed)
}
FUN.LineZero.Boot()
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Outputs R environment report.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Report <- function() {
cat("Line Zero Environment:\n\n")
paste("R:", pacman::p_version(), "\n") |> cat()
cat("Libraries:\n")
for (str_libraries in str_libraries) {
paste(
" ", str_libraries, ": ", pacman::p_version(package = str_libraries),
"\n", sep = ""
) |> cat()
}
paste("\nOperating System:", pacman::p_detectOS(), "\n") |> cat()
paste(" Library Path:", path_library, "\n") |> cat()
paste(" Working Path:", path_working, "\n") |> cat()
paste("Seed:", seed, "\n\n") |> cat()
cat("YAML Header:\n")
cat(YAML_header)
}
FUN.LineZero.Report()
## Line Zero Environment:
##
## R: 4.2.2
## Libraries:
## readxl: 1.4.2
## phyloseq: 1.40.0
## tidyverse: 2.0.0
## pacman: 0.5.1
## yaml: 2.3.7
## ggplot2: 3.4.1
## vegan: 2.6.4
## microbiome: 1.18.0
## ggpubr: 0.6.0
## viridis: 0.6.2
## decontam: 1.16.0
## gridExtra: 2.3
## ggpubr: 0.6.0
## lme4: 1.1.31
## lmerTest: 3.1.3
## writexl: 1.4.2
## harrietr: 0.2.3
## Maaslin2: 1.10.0
## ggtext: 0.1.2
## ggpmisc: 0.5.2
## gridExtra: 2.3
## gamm4: 0.2.6
## reshape2: 1.4.4
## kableExtra: 1.3.4
## knitr: 1.42
##
## Operating System: Darwin
## Library Path: /Library/Frameworks/R.framework/Resources/library
## Working Path: /Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git
## Seed: 20230314
##
## YAML Header:
## ---
## title: "Host-DNA depletion 1: data wrangling"
## author: "Minsik Kim"
## date: "2032.03.10"
## output:
## rmdformats::downcute:
## downcute_theme: "chaos"
## code_folding: hide
## fig_width: 6
## fig_height: 6
## ---
#Script description#
*1. Loading data 1.1. phyloseq obejct 1.2. qPCR data (controls)
QC Q1. How many samples failed sequencing Q2. How were changes in read stats and host DNA proportion? Q3. How were the extraction controls Q4. Prevalence / abundance filtering - red flag
Exploratory analysis *
data input required:
Meta data
qPCR - bacteria
qPCR - human
qPCR host %
Raw reads
final reads
sequencing host %
library prep failure status
Raw reads
subject_id
treatment
sample_type
subject_id
Sequencing result
samples
controls
Analysis
calculation of alpha-diversity indices
qPCR, by smaple type and treatment A. Total DNA B. Host DNA C. Bacterial DNA D. Host %
Sequencing results (treatment, raw reads, host reads, % host, final reads) –> both stratified and nonstratified library prep % vs sample_type + treatment + sample_type * treatment + subject_id (binary) log10(Final reads) vs sample_type + treatment + sample_type * treatment + subject_id (linear or binary, see regular and cumulative distribution) Host DNA % vs sample_type + treatment + sample_type * treatment + subject_id (linear or binary, see regular and cumulative distribution) –> both stratified and nonstratified
LM of taxa alpha diversity (richness) –> both stratified and nonstratified
Permanova (Taxa dist ~ log10(final reads) + sample_type + treatment + sample_type * treatment + subject_id) –> both stratified and nonstratified
DA analysis for taxa, by sample type and treatment –> both stratified and nonstratified –> look at level groups as well - phylum, etc.
Decontam - stratified by treatment –> both stratified and nonstratified
LM of taxa alpha diversity (Shannon) –> both stratified and nonstratified
LM of taxa alpha diversity (InvSimpson) –> both stratified and nonstratified
LM of taxa alpha diversity (BPI) –> both stratified and nonstratified
LM of function alpha diversity (richness) –> both stratified and nonstratified
LM of function alpha diversity (Shannon) –> both stratified and nonstratified
LM of function alpha diversity (InvSimpson) –> both stratified and nonstratified
LM of function alpha diversity (BPI) –> both stratified and nonstratified
permanova of function alpha diversity –> both stratified and nonstratified
#Loading data#
# Loading files -----------------------------------------------------------
#loading tidy phyloseq object
phyloseq <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/Phyloseq/PHY_20221129_MGK_host_tidy_tax.rds")
####Need to be removed after adding sequenicing data
data_qPCR <- read_csv("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Tables/DAT_20230122_MGK_host_control_qPCR.csv")
#qPCR data of all the samples sent out sequencing
data_qPCR <- subset(data_qPCR, data_qPCR$baylor_other_id %in% c(sample_names(phyloseq$phyloseq_count)) | data_qPCR$baylor_other_id %in% c(read_excel("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_Baylor/3_Documentation/Communications/2023-01-24_baylor_shipping_sicas2_nasal_host_depleted/CMMR_MetadataCapture_20230124_LaiP-PQ00430_SICAS2_NS.xlsx", skip = 27) %>% data.frame %>% .$`Optional..............secondary.ID`))
data_qPCR <- subset(data_qPCR, data_qPCR$sample_type %in% c("BAL", "nasal_swab", "Sputum", "neg_depletion", "pos_depletion"))
data_qPCR$sample_type <- factor(data_qPCR$sample_type, levels = c("BAL", "nasal_swab", "Sputum", "pos_depletion", "neg_depletion"),
labels = c("BAL", "Nasal swab", "Sputum", "P depletion", "N depletion"))
data_qPCR$treatment <- factor(data_qPCR$treatment, levels = c("control", "lyPMA", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("Control", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"))
#sample data loading
sample_data <- sample_data(phyloseq$phyloseq_count)
- QC Q1. How many samples failed sequencing Q2. How were changes in read stats and host DNA proportion? Q3. How were the extraction controls Q4. Decontam - were there any contaminants?
Q1. How many samples failed in sequencing?
Figure - regular scale
# Initail QC --------------------------------------------------------------
#Quesetions - QC
#Q0. How many samples failed in sequencing
## figures -----raw data
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
mutate(host_seq_percent = sequencing_host_prop * 100,
.after = sequencing_host_prop,) %>%
gather(feature, value, Raw_reads:host_seq_percent) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent")) %>%
mutate(feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = value, fill = treatment)) +
geom_histogram(bins = 97) +
theme(legend.position = "top") +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("log10 transfromed histrogram")
Figure - log10 scale
## figures -----log10
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
mutate(host_seq_percent = sqrt(sequencing_host_prop),
.after = sequencing_host_prop,) %>%
gather(feature, value, Raw_reads:host_seq_percent) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent")) %>%
mutate(feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "host_seq_percent"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = log10(value), fill = treatment)) +
geom_histogram(bins = 97) +
theme(legend.position = "top") +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
facet_grid(sample_type~feature, scales = "free") +
ggtitle("log10 for reads, sqrt for host % transfromed histrogram")
No somaple showed 0 reads
Figure - log10 scale
ggarrange(ggplot(sample_data %>% subset(., !is.na(.$subject_id)) %>% data.frame(), aes(x = Final_reads, fill = treatment)) +
geom_histogram(bins = 97) +
facet_wrap(~sample_type) +
theme_classic(base_family = "serif") +
ggtitle("Histogram of final reads by sample type and treatment") +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment"),
ggplot(sample_data %>% subset(., !is.na(.$subject_id)) %>% data.frame(), aes(x = log10(Final_reads), fill = treatment)) +
geom_histogram(bins = 97) +
facet_wrap(~sample_type) +
theme_classic(base_family = "serif") +
ggtitle("Histogram of log10(final reads) by sample type and treatment") +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment"),
common.legend = T, ncol = 1)
Histogram of total reads (sum of OTU table)
hist((log10((phyloseq$phyloseq_count %>% otu_table %>% colSums()) + 1)),100, main = "Histogram of total reads (sum of OTU table)") # 2 samples showed 0 total reads (sum of otu_table)
#how were the samples failed in library prep?
sample_data %>% data.frame %>% mutate(total_read = phyloseq$phyloseq_count %>% otu_table %>% colSums()) %>% rownames_to_column(var = "baylor_other_id") %>%
ggplot(aes(x = reorder(baylor_other_id, -total_read),
y = log10(total_read + 1),
col = lib_failed)) +
geom_point() +
theme_classic(base_family = "serif") +
theme(axis.title.y = element_markdown(), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 4)) +
ylab("log<sub>10</sub>(Sum of OTU table reads)") +
xlab("Sample ID") +
guides(col=guide_legend(title="Library failed")) +
ggtitle("Sum of OTU reads by library failure status")
Samples failed in sequencing
sample_data %>% data.frame %>% filter(phyloseq$phyloseq_count %>% otu_table %>% colSums() == 0) # two BAL sampels showed 0 total reads
sample_data(phyloseq$phyloseq_count) %>% data.frame() %>% subset(., .$lib_failed)
Results:
2.1.1 Modeling final read should be conducted with both linear and logistic regression. Host % need no transformation, and should be tested for both linear and binomial methods
2.1.2 13 samples failed in library prep
2.1.3. Two BAL sampels showed 0 total reads
2.1.4. Sequencing fail ≠ library prep fail
Comments from Baylor:
Q: What was the lab’s criteria for deciding which samples failed library prep.? There were 13 samples that you pointed as failed but their sequencing result actually looks pretty good (ie similar to samples that didn’t fail library prep)
A: To determine whether a library attempt “passed or failed” the lab looks at the picogreen concentrations and a library fragment size distribution trace. The trace files are an output from either the Fragment Analyzer or TapeStation (a copy of the trace files for PQ00331 is attached). If a sample has a background level pico concentration and no discernable fragment concentration on the trace (i.e. a flat trace line) it is considered failed library. If the sample is below the level of detection of our standard library QC methods, it is considered failure. It’s still possible that there is some small amounts of library in those samples that were successfully sequenced, but often those samples do not generate a meaningful amount of sequence data.
QC2 table
Results
QC table by treated (binary)
#sequencing result by sample type and control (1/0)
options(dplyr.summarise.inform = FALSE)
sample_data %>% data.frame() %>%
group_by(sample_type, treated) %>%
summarise(N = n(),
`Raw reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Raw_reads/10000000),2),nsmall = 2, big.mark = ","), " [", format(round(quantile(Raw_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Raw_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Host_mapped/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Host_mapped/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Host_mapped/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads proportion<br>(median [IQR])<br>[%]` = paste(format(round(median(sequencing_host_prop * 100),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(sequencing_host_prop * 100, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(sequencing_host_prop * 100, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Final reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Final_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Final_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Final_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
) %>%
rename(`Sample type` = sample_type, Treated = treated) %>%
data.frame(check.names = F) %>% mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "serif")
| Sample type | Treated | N |
Raw reads (median [IQR]) [reads x 107] |
Host reads (median [IQR]) [reads x 107] |
Host reads proportion (median [IQR]) [%] |
Final reads (median [IQR]) [reads x 107] |
|---|---|---|---|---|---|---|
| BAL | 0 | 5 | 15.73 [6.35, 15.92] | 12.92 [5.21, 12.94] | 82.14 [82.12, 82.18] | 0.03 [0.03, 0.04] |
| BAL | 1 | 25 | 6.17 [4.57, 17.43] | 4.65 [2.78, 12.80] | 75.59 [69.38, 78.44] | 0.17 [0.10, 0.37] |
| Nasal swab | 0 | 10 | 13.09 [7.73, 16.93] | 10.05 [6.11, 13.04] | 77.34 [76.89, 79.45] | 0.48 [0.10, 0.87] |
| Nasal swab | 1 | 25 | 4.08 [0.99, 6.40] | 0.81 [0.26, 1.36] | 27.46 [13.51, 64.58] | 0.97 [0.17, 3.42] |
| Sputum | 0 | 5 | 8.59 [8.25, 9.27] | 6.87 [6.69, 7.50] | 80.61 [79.92, 80.89] | 0.06 [0.06, 0.09] |
| Sputum | 1 | 25 | 12.23 [10.34, 13.73] | 7.71 [3.76, 8.82] | 58.64 [39.71, 74.93] | 1.16 [0.47, 4.19] |
| Neg. ext. | 0 | 1 | 0.02 [0.02, 0.02] | 0.00 [0.00, 0.00] | 3.13 [3.13, 3.13] | 0.02 [0.02, 0.02] |
| Pos. ext. | 0 | 1 | 11.87 [11.87, 11.87] | 0.00 [0.00, 0.00] | 0.02 [0.02, 0.02] | 9.96 [9.96, 9.96] |
QC table by treatment methods
sample_data %>% data.frame() %>%
#dplyr::filter(sample_type %in% c("Sputum", "nasal_swab", "BAL")) %>%
group_by (sample_type, treatment) %>%
summarise(N = n(),
`Raw reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Raw_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Raw_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Raw_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Host_mapped/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Host_mapped/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Host_mapped/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Host reads proportion<br>(median [IQR])<br>[%]` = paste(format(round(median(sequencing_host_prop * 100),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(sequencing_host_prop * 100, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(sequencing_host_prop * 100, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
`Final reads<br>(median [IQR])<br>[reads x 10<sup>7</sup>]` = paste(format(round(median(Final_reads/10000000),2), nsmall = 2, big.mark = ","), " [", format(round(quantile(Final_reads/10000000, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(Final_reads/10000000, 0.75),2), nsmall = 2, big.mark = ","), "]", sep = ""),
) %>% data.frame(check.names = F) %>%
arrange(sample_type, treatment) %>%
rename(`Sample type` = sample_type, Treatment = treatment) %>%
mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "serif")
| Sample type | Treatment | N |
Raw reads (median [IQR]) [reads x 107] |
Host reads (median [IQR]) [reads x 107] |
Host reads proportion (median [IQR]) [%] |
Final reads (median [IQR]) [reads x 107] |
|---|---|---|---|---|---|---|
| BAL | Control | 5 | 15.73 [6.35, 15.92] | 12.92 [5.21, 12.94] | 82.14 [82.12, 82.18] | 0.03 [0.03, 0.04] |
| BAL | lyPMA | 5 | 5.72 [3.59, 13.41] | 4.65 [2.79, 10.90] | 80.73 [77.85, 81.18] | 0.06 [0.04, 0.10] |
| BAL | Benzonase | 5 | 18.59 [16.20, 23.63] | 14.77 [12.80, 18.16] | 79.02 [77.89, 79.45] | 0.17 [0.16, 0.22] |
| BAL | Host zero | 5 | 4.57 [2.32, 4.71] | 2.69 [1.61, 2.93] | 66.98 [57.17, 68.95] | 0.24 [0.13, 0.82] |
| BAL | Molysis | 5 | 4.76 [3.57, 4.86] | 2.78 [1.39, 3.61] | 74.39 [74.29, 75.59] | 0.29 [0.13, 1.56] |
| BAL | QIAamp | 5 | 17.19 [15.35, 17.43] | 11.87 [10.79, 12.22] | 74.88 [71.08, 77.31] | 0.26 [0.10, 1.02] |
| Nasal swab | Control | 10 | 13.09 [7.73, 16.93] | 10.05 [6.11, 13.04] | 77.34 [76.89, 79.45] | 0.48 [0.10, 0.87] |
| Nasal swab | lyPMA | 5 | 0.98 [0.85, 1.24] | 0.63 [0.28, 0.88] | 71.37 [28.73, 74.68] | 0.07 [0.06, 0.08] |
| Nasal swab | Benzonase | 5 | 5.75 [4.95, 6.57] | 3.66 [1.29, 5.05] | 64.58 [63.71, 76.83] | 0.28 [0.26, 1.04] |
| Nasal swab | Host zero | 5 | 2.83 [1.42, 6.42] | 0.49 [0.03, 0.81] | 7.67 [2.33, 25.73] | 2.43 [0.97, 5.03] |
| Nasal swab | Molysis | 5 | 0.99 [0.63, 4.08] | 0.42 [0.06, 0.64] | 41.04 [4.31, 64.24] | 0.32 [0.17, 2.53] |
| Nasal swab | QIAamp | 5 | 6.40 [6.40, 6.80] | 0.86 [0.86, 1.17] | 17.24 [13.51, 19.57] | 4.63 [4.50, 4.67] |
| Sputum | Control | 5 | 8.59 [8.25, 9.27] | 6.87 [6.69, 7.50] | 80.61 [79.92, 80.89] | 0.06 [0.06, 0.09] |
| Sputum | lyPMA | 5 | 10.98 [5.22, 12.78] | 8.82 [3.76, 10.44] | 76.33 [72.02, 80.29] | 0.25 [0.15, 0.44] |
| Sputum | Benzonase | 5 | 10.76 [10.34, 10.82] | 7.81 [7.75, 8.24] | 74.93 [72.13, 75.33] | 0.47 [0.45, 0.59] |
| Sputum | Host zero | 5 | 13.14 [7.64, 13.95] | 4.39 [3.80, 7.71] | 49.71 [31.45, 55.49] | 2.91 [2.36, 3.67] |
| Sputum | Molysis | 5 | 12.59 [10.84, 13.73] | 2.98 [1.83, 4.28] | 27.49 [14.62, 28.19] | 6.11 [5.56, 8.37] |
| Sputum | QIAamp | 5 | 12.35 [12.23, 12.85] | 9.08 [8.41, 9.27] | 71.76 [56.69, 73.50] | 1.16 [1.13, 3.89] |
| Neg. ext. | Control | 1 | 0.02 [0.02, 0.02] | 0.00 [0.00, 0.00] | 3.13 [3.13, 3.13] | 0.02 [0.02, 0.02] |
| Pos. ext. | Control | 1 | 11.87 [11.87, 11.87] | 0.00 [0.00, 0.00] | 0.02 [0.02, 0.02] | 9.96 [9.96, 9.96] |
QC2 figure
# Summary figures - facet and z-score -------------------------------------
sample_data %>%
subset(., !is.na(.$subject_id)) %>%
data.frame() %>%
gather(feature, value, Raw_reads:sequencing_host_prop) %>%
group_by(feature, sample_type) %>%
subset(., .$feature %in% c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop")) %>%
mutate(z_score = scale(value),
feature = factor(feature, levels = c("Raw_reads", "Host_mapped", "Final_reads", "sequencing_host_prop"), labels = c("Raw reads", "Host mapped", "Final reads", "Host %"))) %>%
ggplot(aes(x = treatment, y = z_score, fill = treatment)) +
geom_boxplot(lwd = 0.2) +
guides(fill=guide_legend(title="Treatment", nrow = 1)) +
facet_grid(sample_type~feature) +
xlab("Treatment") +
ylab("Z score") +
theme_classic(base_family = "serif", base_size = 14) +
guides( x = guide_axis(angle = 90)) +
theme(legend.position = "top") +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33"), name = "Treatment") #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
Results:
2.2.1. There were no differences between raw reads, however, final reads increased after some treatment, and host DNA proportion decreased
2.2.2. The changes in final reads were varied by treatment and sample type
comment: Should I normalize z-score to mean value of control?
Q3. How was the extraction controls, both pos and neg?
#Loading theoretical mock community
zymo_mock <- read_excel("/Users/minsikkim/Dropbox (Partners HealthCare)/@minsik/project_sicas2/data_raw/DAR_20210929_zymo_mock_data.xlsx") %>%
data.frame(row.names = T) %>% rename(mock_theoretical = Mock) %>% mutate(mock_theoretical = mock_theoretical/100) %>%
merge_phyloseq(otu_table(., taxa_are_rows = T), tax_table(phyloseq$phyloseq_count))
phyloseq_control_rel <- rbind(c("mock_theoretical", "mock")) %>% data.frame() %>%
column_to_rownames(var = "X1") %>% rename(sample_type = X2) %>% #making sample_data of mock community
merge_phyloseq(sample_data(.), zymo_mock) %>%
merge_phyloseq(., subset_samples(phyloseq$phyloseq_rel, sample_type == "Pos. ext." | sample_type == "Neg. ext." )) #adding data of controls
#Species richness of each control groups
cat("Species richness of controls\n")
## Species richness of controls
rowSums(t(otu_table(phyloseq_control_rel)) != 0)
## mock_theoretical 20220606_Pos 20220606_Neg
## 10 41 3
sample_data(phyloseq_control_rel)$sample_type <- factor(sample_data(phyloseq_control_rel)$sample_type,
levels = c("mock", "2", "1"),
labels = c("Mock, 10 spp.", "Pos., 41 spp.", "Neg., 3 spp."))
#Manipulating phyloseq - only top 10
tax_table(phyloseq_control_rel) %>%
cbind(species10 = "[Others]") %>%
{top10species <- head(taxa_sums(phyloseq_control_rel), 10) %>% names()
.[top10species, "species10"] <- as.character(.[top10species, "Species"])
.[, 8] <- .[, 8] %>% gsub("s__", "", .) %>% gsub("_", " ", .) %>% paste("<i>", ., "</i>", sep = "")
phyloseq_temp <- phyloseq_control_rel
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
plot_bar(., fill="species10") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "serif") +
ggtitle("Species richness of control data") +
theme(legend.text = element_markdown()) +
guides(fill=guide_legend(title="Top 10 species")) +
facet_wrap (~ sample_type, scales= "free_x", nrow=1)
#there could be opportunistic pathogens...
Results
2.3.1. Negative control showed minimal number of possible contaminants
2.3.2. Positive control contained various contaminants
Results:
2.2.1. There were no differences between raw reads, however, final reads increased after some treatment, and host DNA proportion decreased
2.2.2. The changes in final reads were varied by treatment and sample type
Q4. Prevalence filtering - red flag
Results
Prelanence taxa - histogram
#Calculation of sample prevalence, standard deviation, median abundance across all samples for all bugs and making into a table.
#
#• In initial analysis we will not perform prevalence or abundance filtering (though we may consider this for secondary differential abundance analyses to manage p (features) > n (sample size) problem and issues with multiple hypothesis correction)
taxa_qc <- data.frame("species" = otu_table(phyloseq$phyloseq_rel) %>% t() %>% colnames(),
"prevalence" = ifelse(phyloseq$phyloseq_count %>% otu_table() > 0, 1, 0) %>% t() %>% colSums(), #Prevalence of taxa
"mean_rel_abd" = phyloseq$phyloseq_rel %>% otu_table() %>% t() %>% colMeans(na.rm = T) #mean relativ abundacne
)
hist(log10(taxa_qc$prevalence), xlab = "log10(Taxa prevalence)", main = "Histogram of prevalence of taxa")
### Mean abundance taxa - histogram
hist(log10(taxa_qc$mean_rel_abd), xlab = "log10(Mean relative abundance)", main = "Histogram of mean relative abundance")
Results:
• In initial analysis we will not perform prevalence or abundance filtering (though we may consider this for secondary differentialabundance analyses to manage p (features) > n (sample size) problem and issues with multiple hypothesis correction)
• Calculation of sample prevalence, standard deviation, median abundance across all samples for all bugs and making into a table.
Although we don’t consider the prevalence of abundance at this time, we can consider their red-flags after running the DA analysis
- red flag
red_flag_taxa <- data.frame(species = taxa_qc$species,
red_flag_prev_abd = ifelse(taxa_qc$prevalence < otu_table(phyloseq$phyloseq_rel) %>% t %>% rownames() %>% length * 0.05 & taxa_qc$mean_rel_abd < quantile(taxa_qc$mean_rel_abd, 0.75), 1, 0)
)
#Analysis#
A0. calculation of alpha-diversity indices
alpha_diversity <- function(data) {
otu_table <- otu_table(data) %>% .[colSums(.) !=0]
S.obs <- rowSums(t(otu_table) != 0)
sample_data <- sample_data(data)
data_evenness <- vegan::diversity(t(otu_table)) / log(vegan::specnumber(t(otu_table))) # calculate evenness index using vegan package
data_shannon <- vegan::diversity(t(otu_table), index = "shannon") # calculate Shannon index using vegan package
data_hill <- exp(data_shannon) # calculate Hills index
data_dominance <- microbiome::dominance(otu_table, index = "all", rank = 1, aggregate = TRUE) # dominance (Berger-Parker index), etc.
data_invsimpson <- vegan::diversity(t(otu_table), index = "invsimpson") # calculate Shannon index using vegan package
alpha_diversity <- cbind(S.obs, data_shannon, data_hill, data_invsimpson, data_evenness,data_dominance) # combine all indices in one data table
sample_data <- merge(data.frame(sample_data), alpha_diversity, by = 0, all = T) %>% column_to_rownames(var = "Row.names")
}
sample_data(phyloseq$phyloseq_rel) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_count) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_path_rpkm) <- sample_data(alpha_diversity(phyloseq$phyloseq_path_rpkm))
A1. qPCR, by smaple type and treatment
A. Total DNA
B. Host DNA
C. Bacterial DNA
D. Host %
#2A: Change in total DNA (qPCR)
f2a <- ggplot(data_qPCR, aes(x = sample_type, y = log10(DNA_host_nondil + DNA_bac_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
ylab("log<sub>10</sub>(qPCR total DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "A") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2B: Change in human DNA (qPCR)
f2b <- ggplot(data_qPCR, aes(x = sample_type, y = log10(DNA_host_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
ylab("log<sub>10</sub>(qPCR host DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "B") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2C: Change in 16S DNA (qPCR)
f2c <- ggplot(data_qPCR, aes(x = sample_type, y = log10(DNA_bac_nondil))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
ylab("log<sub>10</sub>(qPCR bacterial DNA)<br>(ng/μL)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif")+
labs(tag = "C") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#2D. Change in % host (qPCR)
f2d <- ggplot(data_qPCR, aes(x = sample_type, y = log10(host_proportion * 100))) +
geom_boxplot(aes(fill = treatment), lwd = 0.2) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
ylab("log<sub>10</sub>(host DNA) (%)") +
xlab("Sample type") +
theme_classic (base_size = 12, base_family = "serif") +
labs(tag = "D") +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(plot.tag = element_text(size = 15), axis.title.y = element_markdown()) + # Plot title size
guides(fill = guide_legend(nrow = 1, title = "Treatment"))
#output for markdown
ggarrange(f2a, f2b, f2c, f2d, common.legend = T , align = "hv")
A2. Sequencing results (treatment, raw reads, host reads, % host, final reads)
--> both stratified and nonstratified
library prep % vs sample_type + treatment + sample_type * treatment + subject_id (binary)
log10(Final reads) vs sample_type + treatment + sample_type * treatment + subject_id (linear or binary, see regular and cumulative distribution)
Host DNA % vs sample_type + treatment + sample_type * treatment + subject_id (linear or binary, see regular and cumulative distribution)
--> both stratified and nonstratified
Results
Library failure
cat("Binomial - library failed? \n\n")
## Binomial - library failed?
glmer(lib_failed ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`t value`) > 2 ~ "*", .default = " ")) %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | t value | ||
|---|---|---|---|---|
| (Intercept) | 0.0000000 | 0.1167011 | 0.0000000 | |
| sample_typeNasal swab | -0.0204626 | 0.1470919 | -0.1391145 | |
| sample_typeSputum | 0.0000000 | 0.1650403 | 0.0000000 | |
| treatmentlyPMA | 0.2000000 | 0.1619675 | 1.2348160 | |
| treatmentBenzonase | 0.0000000 | 0.1619675 | 0.0000000 | |
| treatmentHost zero | 0.2000000 | 0.1619675 | 1.2348160 | |
| treatmentMolysis | 0.2000000 | 0.1619675 | 1.2348160 | |
| treatmentQIAamp | 0.0000000 | 0.1619675 | 0.0000000 | |
| sample_typeNasal swab:treatmentlyPMA | 0.5978181 | 0.2143680 | 2.7887474 |
|
| sample_typeSputum:treatmentlyPMA | -0.2000000 | 0.2290566 | -0.8731468 | |
| sample_typeNasal swab:treatmentBenzonase | -0.0021819 | 0.2143680 | -0.0101781 | |
| sample_typeSputum:treatmentBenzonase | 0.0000000 | 0.2290566 | 0.0000000 | |
| sample_typeNasal swab:treatmentHost zero | 0.1978181 | 0.2143680 | 0.9227971 | |
| sample_typeSputum:treatmentHost zero | -0.2000000 | 0.2290566 | -0.8731468 | |
| sample_typeNasal swab:treatmentMolysis | 0.6021819 | 0.2143680 | 2.8091035 |
|
| sample_typeSputum:treatmentMolysis | -0.2000000 | 0.2290566 | -0.8731468 | |
| sample_typeNasal swab:treatmentQIAamp | 0.0021819 | 0.2143680 | 0.0101781 | |
| sample_typeSputum:treatmentQIAamp | 0.0000000 | 0.2290566 | 0.0000000 |
log10(Final reads)
cat("Linear - Final reads? \n\n")
## Linear - Final reads?
lmer(log10(Final_reads) ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*", .default = " ")) %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 5.4933063 | 0.2095210 | 62.79820 | 26.2184086 | 0.0000000 |
|
| sample_typeNasal swab | 1.1246781 | 0.2789651 | 37.26869 | 4.0316080 | 0.0002633 |
|
| sample_typeSputum | 0.3332900 | 0.2963074 | 62.79820 | 1.1248117 | 0.2649498 | |
| treatmentlyPMA | 0.3521385 | 0.2733402 | 67.65016 | 1.2882794 | 0.2020376 | |
| treatmentBenzonase | 0.8109049 | 0.2733402 | 67.65016 | 2.9666509 | 0.0041580 |
|
| treatmentHost zero | 0.9501904 | 0.2733402 | 67.65016 | 3.4762193 | 0.0008929 |
|
| treatmentMolysis | 1.0358039 | 0.2733402 | 67.65016 | 3.7894313 | 0.0003238 |
|
| treatmentQIAamp | 1.0480985 | 0.2733402 | 67.65016 | 3.8344107 | 0.0002788 |
|
| sample_typeNasal swab:treatmentlyPMA | -0.8774152 | 0.3621899 | 68.18953 | -2.4225278 | 0.0180749 |
|
| sample_typeSputum:treatmentlyPMA | 0.1879364 | 0.3865614 | 67.65016 | 0.4861747 | 0.6284145 | |
| sample_typeNasal swab:treatmentBenzonase | -0.6887274 | 0.3621899 | 68.18953 | -1.9015642 | 0.0614539 | |
| sample_typeSputum:treatmentBenzonase | 0.0349655 | 0.3865614 | 67.65016 | 0.0904526 | 0.9281949 | |
| sample_typeNasal swab:treatmentHost zero | -0.1157121 | 0.3621899 | 68.18953 | -0.3194791 | 0.7503399 | |
| sample_typeSputum:treatmentHost zero | 0.7231403 | 0.3865614 | 67.65016 | 1.8706997 | 0.0657128 | |
| sample_typeNasal swab:treatmentMolysis | -0.8411412 | 0.3621899 | 68.18953 | -2.3223760 | 0.0232052 |
|
| sample_typeSputum:treatmentMolysis | 0.9543008 | 0.3865614 | 67.65016 | 2.4686913 | 0.0160928 |
|
| sample_typeNasal swab:treatmentQIAamp | 0.0236242 | 0.3621899 | 68.18953 | 0.0652260 | 0.9481849 | |
| sample_typeSputum:treatmentQIAamp | 0.3676901 | 0.3865614 | 67.65016 | 0.9511817 | 0.3448982 |
Host %
cat("Linear - Host DNA proportion? \n\n")
## Linear - Host DNA proportion?
lmer(sequencing_host_prop ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*", .default = " ")) %>%
kbl(format = "html") %>% kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.8200713 | 0.0646272 | 75.67093 | 12.6892671 | 0.0000000 |
|
| sample_typeNasal swab | -0.0378253 | 0.0799149 | 41.00023 | -0.4733196 | 0.6384957 | |
| sample_typeSputum | -0.0293956 | 0.0913966 | 75.67093 | -0.3216265 | 0.7486224 | |
| treatmentlyPMA | -0.0377605 | 0.0908962 | 68.23467 | -0.4154243 | 0.6791348 | |
| treatmentBenzonase | -0.0324363 | 0.0908962 | 68.23467 | -0.3568498 | 0.7223055 | |
| treatmentHost zero | -0.2000025 | 0.0908962 | 68.23467 | -2.2003393 | 0.0311715 |
|
| treatmentMolysis | -0.1577230 | 0.0908962 | 68.23467 | -1.7351990 | 0.0872202 | |
| treatmentQIAamp | -0.0928027 | 0.0908962 | 68.23467 | -1.0209737 | 0.3108732 | |
| sample_typeNasal swab:treatmentlyPMA | -0.1931415 | 0.1202628 | 68.80454 | -1.6059959 | 0.1128550 | |
| sample_typeSputum:treatmentlyPMA | 0.0106590 | 0.1285467 | 68.23467 | 0.0829196 | 0.9341582 | |
| sample_typeNasal swab:treatmentBenzonase | -0.1301969 | 0.1202628 | 68.80454 | -1.0826036 | 0.2827639 | |
| sample_typeSputum:treatmentBenzonase | -0.0432386 | 0.1285467 | 68.23467 | -0.3363648 | 0.7376281 | |
| sample_typeNasal swab:treatmentHost zero | -0.3924910 | 0.1202628 | 68.80454 | -3.2636117 | 0.0017148 |
|
| sample_typeSputum:treatmentHost zero | -0.1532698 | 0.1285467 | 68.23467 | -1.1923278 | 0.2372628 | |
| sample_typeNasal swab:treatmentMolysis | -0.2637315 | 0.1202628 | 68.80454 | -2.1929608 | 0.0316937 |
|
| sample_typeSputum:treatmentMolysis | -0.3863225 | 0.1285467 | 68.23467 | -3.0053090 | 0.0037094 |
|
| sample_typeNasal swab:treatmentQIAamp | -0.5240678 | 0.1202628 | 68.80454 | -4.3576894 | 0.0000449 |
|
| sample_typeSputum:treatmentQIAamp | -0.0370057 | 0.1285467 | 68.23467 | -0.2878775 | 0.7743131 |
A3. LM of taxa alpha diversity
--> both stratified and nonstratified
Results
Species richness
sample_data <- sample_data(phyloseq$phyloseq_count) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
cat("Species richness of all samples\n\n")
## Species richness of all samples
cat("S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_sob <- lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -62.286176 | 18.946348 | 73.91996 | -3.2875031 | 0.0015486 |
|
| sample_typeNasal swab | -9.894010 | 13.143390 | 19.76996 | -0.7527746 | 0.4604558 | |
| sample_typeSputum | 5.066489 | 11.833705 | 23.07241 | 0.4281405 | 0.6725190 | |
| treatmentlyPMA | -3.032607 | 7.992068 | 64.29757 | -0.3794521 | 0.7056025 | |
| treatmentBenzonase | -4.719045 | 8.051585 | 65.30664 | -0.5861014 | 0.5598277 | |
| treatmentHost zero | -2.064590 | 8.208256 | 65.44300 | -0.2515260 | 0.8021956 | |
| treatmentMolysis | 10.862491 | 8.314322 | 65.52729 | 1.3064795 | 0.1959566 | |
| treatmentQIAamp | -2.491588 | 8.330143 | 65.53938 | -0.2991050 | 0.7658063 | |
| log10(Final_reads) | 12.532134 | 3.113656 | 67.56866 | 4.0248934 | 0.0001465 |
|
| sample_typeNasal swab:treatmentlyPMA | 4.202591 | 10.419319 | 64.56848 | 0.4033461 | 0.6880263 | |
| sample_typeSputum:treatmentlyPMA | 34.664316 | 10.599221 | 64.21630 | 3.2704588 | 0.0017287 |
|
| sample_typeNasal swab:treatmentBenzonase | 1.775047 | 10.042806 | 64.92798 | 0.1767481 | 0.8602564 | |
| sample_typeSputum:treatmentBenzonase | 62.518484 | 10.357810 | 64.45145 | 6.0358784 | 0.0000001 |
|
| sample_typeNasal swab:treatmentHost zero | 4.793943 | 9.787379 | 64.68743 | 0.4898086 | 0.6259264 | |
| sample_typeSputum:treatmentHost zero | 92.094185 | 10.559614 | 64.43191 | 8.7213589 | 0.0000000 |
|
| sample_typeNasal swab:treatmentMolysis | -7.289176 | 10.178107 | 65.15977 | -0.7161623 | 0.4764500 | |
| sample_typeSputum:treatmentMolysis | 87.197251 | 10.723044 | 64.48528 | 8.1317631 | 0.0000000 |
|
| sample_typeNasal swab:treatmentQIAamp | -3.726532 | 9.774308 | 64.66718 | -0.3812579 | 0.7042616 | |
| sample_typeSputum:treatmentQIAamp | 70.548734 | 10.400892 | 64.40670 | 6.7829501 | 0.0000000 |
|
cat("ANOVA for \n S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_sob %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 7547.487 | 3773.744 | 2 | 10.79325 | 30.28214 | 0.0000374 |
|
| treatment | 7333.548 | 1466.710 | 5 | 64.92720 | 11.76951 | 0.0000000 |
|
| log10(Final_reads) | 2018.806 | 2018.806 | 1 | 67.56866 | 16.19977 | 0.0001465 |
|
| sample_type:treatment | 19463.441 | 1946.344 | 10 | 64.59903 | 15.61830 | 0.0000000 |
|
cat("\n 0.06 p-value for the interaction term. May include the interaction term. \n")
##
## 0.06 p-value for the interaction term. May include the interaction term.
#Species richness - stratified
cat("Species richness at NS\n\n")
## Species richness at NS
cat("S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -26.0233860 | 9.340261 | 28 | -2.7861520 | 0.0094654 |
|
| treatmentlyPMA | -2.1153025 | 2.324810 | 28 | -0.9098818 | 0.3706512 | |
| treatmentBenzonase | -1.7637014 | 2.206148 | 28 | -0.7994486 | 0.4307605 | |
| treatmentHost zero | 8.8224892 | 2.495655 | 28 | 3.5351399 | 0.0014386 |
|
| treatmentMolysis | 4.5783098 | 2.217883 | 28 | 2.0642703 | 0.0483678 |
|
| treatmentQIAamp | 0.8360832 | 2.678401 | 28 | 0.3121575 | 0.7572336 | |
| log10(Final_reads) | 5.6349921 | 1.419898 | 28 | 3.9685903 | 0.0004571 |
|
cat("Species richness at BAL\n\n")
## Species richness at BAL
cat("S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -100.395549 | 25.504894 | 20.38095 | -3.9363249 | 0.0007925 |
|
| treatmentlyPMA | -5.839094 | 7.201885 | 17.39942 | -0.8107731 | 0.4284473 | |
| treatmentBenzonase | -10.667780 | 7.752673 | 18.42945 | -1.3760131 | 0.1853100 | |
| treatmentHost zero | -8.986746 | 8.091470 | 18.61512 | -1.1106444 | 0.2808621 | |
| treatmentMolysis | 3.342010 | 8.316699 | 18.72132 | 0.4018433 | 0.6923498 | |
| treatmentQIAamp | -10.097992 | 8.350027 | 18.73603 | -1.2093364 | 0.2415732 | |
| log10(Final_reads) | 19.520813 | 4.535759 | 19.97726 | 4.3037587 | 0.0003466 |
|
cat("Species richness at sputum\n\n")
## Species richness at sputum
cat("S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -166.73637 | 94.19800 | 20.78908 | -1.770063 | 0.0913826 | |
| treatmentlyPMA | 21.48046 | 13.48708 | 19.68002 | 1.592670 | 0.1271695 | |
| treatmentBenzonase | 41.90046 | 17.06102 | 19.99448 | 2.455918 | 0.0233254 |
|
| treatmentHost zero | 58.57768 | 28.77613 | 20.32431 | 2.035634 | 0.0550337 | |
| treatmentMolysis | 60.65374 | 33.57099 | 20.37080 | 1.806731 | 0.0855997 | |
| treatmentQIAamp | 41.44599 | 24.96239 | 20.26633 | 1.660337 | 0.1122410 | |
| log10(Final_reads) | 31.32813 | 16.05007 | 20.49824 | 1.951899 | 0.0647464 |
Shannon
#Shannon
cat("Shannon of all samples\n\n")
## Shannon of all samples
cat("Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_shannon <- lmer(data_shannon ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_shannon %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.2922817 | 0.8090108 | 73.88309 | 2.8334375 | 0.0059333 |
|
| sample_typeNasal swab | 0.4603976 | 0.5653783 | 19.28942 | 0.8143179 | 0.4254023 | |
| sample_typeSputum | 0.5529713 | 0.5086504 | 22.46019 | 1.0871343 | 0.2885070 | |
| treatmentlyPMA | 0.1726922 | 0.3406059 | 64.15772 | 0.5070146 | 0.6138828 | |
| treatmentBenzonase | 0.3896151 | 0.3431787 | 65.15496 | 1.1353129 | 0.2604034 | |
| treatmentHost zero | 0.2374929 | 0.3498614 | 65.28972 | 0.6788198 | 0.4996513 | |
| treatmentMolysis | 1.0035787 | 0.3543855 | 65.37303 | 2.8318841 | 0.0061444 |
|
| treatmentQIAamp | 0.4240802 | 0.3550603 | 65.38498 | 1.1943892 | 0.2366407 | |
| log10(Final_reads) | -0.2771226 | 0.1327452 | 67.39196 | -2.0876272 | 0.0406149 |
|
| sample_typeNasal swab:treatmentlyPMA | -0.4306354 | 0.4440629 | 64.42526 | -0.9697620 | 0.3357919 | |
| sample_typeSputum:treatmentlyPMA | 0.9407250 | 0.4517137 | 64.07751 | 2.0825690 | 0.0412874 |
|
| sample_typeNasal swab:treatmentBenzonase | -0.1913205 | 0.4280323 | 64.78076 | -0.4469766 | 0.6563828 | |
| sample_typeSputum:treatmentBenzonase | 1.1910341 | 0.4414361 | 64.30980 | 2.6980895 | 0.0088969 |
|
| sample_typeNasal swab:treatmentHost zero | 0.2059598 | 0.4171353 | 64.54324 | 0.4937482 | 0.6231601 | |
| sample_typeSputum:treatmentHost zero | 1.5319202 | 0.4500358 | 64.29022 | 3.4039963 | 0.0011480 |
|
| sample_typeNasal swab:treatmentMolysis | -0.6557050 | 0.4338096 | 65.01040 | -1.5115042 | 0.1355060 | |
| sample_typeSputum:treatmentMolysis | 0.9831440 | 0.4570035 | 64.34280 | 2.1512836 | 0.0352141 |
|
| sample_typeNasal swab:treatmentQIAamp | -0.2868142 | 0.4165773 | 64.52336 | -0.6885016 | 0.4936051 | |
| sample_typeSputum:treatmentQIAamp | 1.0954341 | 0.4432702 | 64.26548 | 2.4712562 | 0.0161247 |
|
cat("ANOVA for \n Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_shannon %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 3.3128614 | 1.6564307 | 2 | 10.63403 | 7.318283 | 0.0100286 |
|
| treatment | 6.1394953 | 1.2278991 | 5 | 64.78018 | 5.424986 | 0.0003143 |
|
| log10(Final_reads) | 0.9864384 | 0.9864384 | 1 | 67.39196 | 4.358187 | 0.0406149 |
|
| sample_type:treatment | 5.5684422 | 0.5568442 | 10 | 64.45592 | 2.460196 | 0.0147100 |
|
cat("\n 0.059 p-value for the interaction term. May include the interaction term. \n")
##
## 0.059 p-value for the interaction term. May include the interaction term.
#Shannon - stratified
cat("Shannon at NS\n\n")
## Shannon at NS
cat("Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.4705305 | 0.7141282 | 28 | 4.859814 | 0.0000407 |
|
| treatmentlyPMA | -0.3473353 | 0.1777480 | 28 | -1.954088 | 0.0607452 | |
| treatmentBenzonase | 0.1820609 | 0.1686754 | 28 | 1.079356 | 0.2896393 | |
| treatmentHost zero | 0.5077044 | 0.1908103 | 28 | 2.660782 | 0.0127593 |
|
| treatmentMolysis | 0.3999084 | 0.1695727 | 28 | 2.358331 | 0.0255726 |
|
| treatmentQIAamp | 0.2884031 | 0.2047825 | 28 | 1.408339 | 0.1700383 | |
| log10(Final_reads) | -0.3901164 | 0.1085611 | 28 | -3.593519 | 0.0012351 |
|
cat("Shannon at BAL\n\n")
## Shannon at BAL
cat("Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.4183961 | 1.4942036 | 20.85902 | 0.2800128 | 0.7822258 | |
| treatmentlyPMA | 0.0342340 | 0.4350346 | 17.07472 | 0.0786927 | 0.9381919 | |
| treatmentBenzonase | 0.0898923 | 0.4638505 | 18.70718 | 0.1937959 | 0.8484245 | |
| treatmentHost zero | -0.1102537 | 0.4832242 | 18.99852 | -0.2281627 | 0.8219575 | |
| treatmentMolysis | 0.6263137 | 0.4961366 | 19.16322 | 1.2623816 | 0.2219587 | |
| treatmentQIAamp | 0.0425762 | 0.4980493 | 19.18590 | 0.0854859 | 0.9327609 | |
| log10(Final_reads) | 0.0676641 | 0.2665336 | 20.80923 | 0.2538672 | 0.8020897 |
cat("Shannon at sputum\n\n")
## Shannon at sputum
cat("Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.1874597 | 3.2210139 | 20.47081 | 0.9895827 | 0.3339316 | |
| treatmentlyPMA | 1.1451368 | 0.4583906 | 19.43439 | 2.4981682 | 0.0215971 |
|
| treatmentBenzonase | 1.6303288 | 0.5807036 | 19.65341 | 2.8075057 | 0.0109962 |
|
| treatmentHost zero | 1.8676909 | 0.9809856 | 19.88476 | 1.9038922 | 0.0714978 | |
| treatmentMolysis | 2.1036052 | 1.1447010 | 19.91755 | 1.8376897 | 0.0810732 | |
| treatmentQIAamp | 1.6026662 | 0.8507368 | 19.84393 | 1.8838567 | 0.0743222 | |
| log10(Final_reads) | -0.3358544 | 0.5476150 | 20.00774 | -0.6133039 | 0.5465850 |
Simpson
#Simpson
cat("Inverse Simpson of all samples\n\n")
## Inverse Simpson of all samples
cat("Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_invsimpson <- lmer(data_invsimpson ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_invsimpson %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 6.1900726 | 3.8073924 | 73.92827 | 1.6258037 | 0.1082471 | |
| sample_typeNasal swab | 0.0267947 | 2.4197759 | 23.40479 | 0.0110732 | 0.9912589 | |
| sample_typeSputum | 0.3706250 | 2.2015520 | 28.41966 | 0.1683471 | 0.8675026 | |
| treatmentlyPMA | 0.0395484 | 1.6411132 | 64.36357 | 0.0240985 | 0.9808486 | |
| treatmentBenzonase | 0.1170930 | 1.6507957 | 65.70796 | 0.0709312 | 0.9436679 | |
| treatmentHost zero | -0.4346363 | 1.6825596 | 65.89013 | -0.2583185 | 0.7969663 | |
| treatmentMolysis | 3.0191007 | 1.7040764 | 66.00271 | 1.7716933 | 0.0810614 | |
| treatmentQIAamp | -0.1002100 | 1.7072867 | 66.01886 | -0.0586955 | 0.9533719 | |
| log10(Final_reads) | -0.6301862 | 0.6360133 | 68.71509 | -0.9908381 | 0.3252410 | |
| sample_typeNasal swab:treatmentlyPMA | -0.5155334 | 2.1386570 | 64.72775 | -0.2410548 | 0.8102746 | |
| sample_typeSputum:treatmentlyPMA | 3.7075291 | 2.1767394 | 64.25349 | 1.7032490 | 0.0933543 | |
| sample_typeNasal swab:treatmentBenzonase | 0.5017860 | 2.0602543 | 65.20211 | 0.2435554 | 0.8083399 | |
| sample_typeSputum:treatmentBenzonase | 8.5350057 | 2.1264111 | 64.56802 | 4.0138078 | 0.0001580 |
|
| sample_typeNasal swab:treatmentHost zero | 1.2602936 | 2.0085942 | 64.87749 | 0.6274505 | 0.5325661 | |
| sample_typeSputum:treatmentHost zero | 7.6738209 | 2.1678975 | 64.54748 | 3.5397526 | 0.0007487 |
|
| sample_typeNasal swab:treatmentMolysis | -2.1225377 | 2.0872801 | 65.50047 | -1.0168916 | 0.3129465 | |
| sample_typeSputum:treatmentMolysis | 4.9473161 | 2.2012695 | 64.62150 | 2.2474831 | 0.0280297 |
|
| sample_typeNasal swab:treatmentQIAamp | 0.4274437 | 2.0059761 | 64.84806 | 0.2130852 | 0.8319290 | |
| sample_typeSputum:treatmentQIAamp | 5.8931251 | 2.1353969 | 64.51036 | 2.7597329 | 0.0075215 |
|
cat("ANOVA for \n Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_invsimpson %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 69.706204 | 34.853102 | 2 | 10.95696 | 6.6310586 | 0.0129671 |
|
| treatment | 100.974561 | 20.194912 | 5 | 65.19749 | 3.8422304 | 0.0041134 |
|
| log10(Final_reads) | 5.160169 | 5.160169 | 1 | 68.71509 | 0.9817601 | 0.3252410 | |
| sample_type:treatment | 150.985221 | 15.098522 | 10 | 64.75927 | 2.8726047 | 0.0050196 |
|
cat("\n 0.098 p-value for the interaction term. May include the interaction term. \n")
##
## 0.098 p-value for the interaction term. May include the interaction term.
#Inverse Simpson - stratified
cat("Inverse Simpson at NS\n\n")
## Inverse Simpson at NS
cat("Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 8.4447083 | 1.6593436 | 28 | 5.089186 | 0.0000217 |
|
| treatmentlyPMA | -0.7192553 | 0.4130140 | 28 | -1.741479 | 0.0925778 | |
| treatmentBenzonase | 0.5953313 | 0.3919330 | 28 | 1.518962 | 0.1399841 | |
| treatmentHost zero | 1.0438388 | 0.4433654 | 28 | 2.354353 | 0.0258009 |
|
| treatmentMolysis | 1.0276348 | 0.3940179 | 28 | 2.608092 | 0.0144403 |
|
| treatmentQIAamp | 0.7559480 | 0.4758312 | 28 | 1.588689 | 0.1233601 | |
| log10(Final_reads) | -0.9695502 | 0.2522519 | 28 | -3.843580 | 0.0006381 |
|
cat("Inverse Simpson at BAL\n\n")
## Inverse Simpson at BAL
cat("Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.1656624 | 6.221700 | 15.68407 | 0.3480821 | 0.7324065 | |
| treatmentlyPMA | -0.2270228 | 2.097314 | 17.31870 | -0.1082446 | 0.9150459 | |
| treatmentBenzonase | -0.0433090 | 2.139150 | 20.28085 | -0.0202459 | 0.9840451 | |
| treatmentHost zero | -0.6874976 | 2.207391 | 20.67913 | -0.3114526 | 0.7585747 | |
| treatmentMolysis | 2.7094082 | 2.253493 | 20.84726 | 1.2023150 | 0.2427206 | |
| treatmentQIAamp | -0.4180640 | 2.260359 | 20.86632 | -0.1849547 | 0.8550505 | |
| log10(Final_reads) | 0.0336259 | 1.100980 | 14.45133 | 0.0305418 | 0.9760527 |
cat("Inverse Simpson at sputum\n\n")
## Inverse Simpson at sputum
cat("Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 3.0200634 | 17.763683 | 20.77908 | 0.1700134 | 0.8666440 | |
| treatmentlyPMA | 3.4188914 | 2.543428 | 19.66465 | 1.3442062 | 0.1941851 | |
| treatmentBenzonase | 8.1380906 | 3.217390 | 19.98083 | 2.5294078 | 0.0199477 |
|
| treatmentHost zero | 6.2223557 | 5.426604 | 20.31250 | 1.1466389 | 0.2648590 | |
| treatmentMolysis | 6.7570946 | 6.330812 | 20.35925 | 1.0673345 | 0.2983142 | |
| treatmentQIAamp | 4.9325861 | 4.707415 | 20.25420 | 1.0478334 | 0.3070591 | |
| log10(Final_reads) | -0.0225186 | 3.026712 | 20.48742 | -0.0074399 | 0.9941358 |
BPI
#BPI
cat("BPI of all samples\n\n")
## BPI of all samples
cat("BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_dbp <- lmer(dbp ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_dbp %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -0.2035448 | 0.2934875 | 73.98607 | -0.6935381 | 0.4901436 | |
| sample_typeNasal swab | -0.2746027 | 0.1918494 | 21.39063 | -1.4313447 | 0.1667807 | |
| sample_typeSputum | -0.1751176 | 0.1739103 | 25.68997 | -1.0069421 | 0.3233533 | |
| treatmentlyPMA | -0.1112415 | 0.1256539 | 63.94756 | -0.8853014 | 0.3793110 | |
| treatmentBenzonase | -0.2369262 | 0.1264658 | 65.21502 | -1.8734412 | 0.0654896 | |
| treatmentHost zero | -0.1391248 | 0.1289092 | 65.38675 | -1.0792465 | 0.2844429 | |
| treatmentMolysis | -0.4135086 | 0.1305640 | 65.49290 | -3.1670959 | 0.0023387 |
|
| treatmentQIAamp | -0.2351370 | 0.1308108 | 65.50813 | -1.7975344 | 0.0768596 | |
| log10(Final_reads) | 0.1754176 | 0.0487909 | 68.06101 | 3.5952957 | 0.0006086 |
|
| sample_typeNasal swab:treatmentlyPMA | 0.1304024 | 0.1637733 | 64.28956 | 0.7962372 | 0.4288246 | |
| sample_typeSputum:treatmentlyPMA | -0.2040959 | 0.1666573 | 63.84450 | -1.2246442 | 0.2252088 | |
| sample_typeNasal swab:treatmentBenzonase | 0.1025637 | 0.1578004 | 64.73833 | 0.6499583 | 0.5180190 | |
| sample_typeSputum:treatmentBenzonase | -0.2217425 | 0.1628249 | 64.14039 | -1.3618465 | 0.1780105 | |
| sample_typeNasal swab:treatmentHost zero | -0.0772609 | 0.1538230 | 64.43370 | -0.5022718 | 0.6171886 | |
| sample_typeSputum:treatmentHost zero | -0.4529917 | 0.1660001 | 64.11913 | -2.7288631 | 0.0081927 |
|
| sample_typeNasal swab:treatmentMolysis | 0.2090844 | 0.1598905 | 65.02325 | 1.3076721 | 0.1955892 | |
| sample_typeSputum:treatmentMolysis | -0.2825321 | 0.1685606 | 64.18786 | -1.6761456 | 0.0985744 | |
| sample_typeNasal swab:treatmentQIAamp | 0.1125011 | 0.1536206 | 64.40678 | 0.7323308 | 0.4666234 | |
| sample_typeSputum:treatmentQIAamp | -0.2947874 | 0.1635091 | 64.08537 | -1.8028812 | 0.0761087 |
cat("ANOVA for \n BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_dbp %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.2829103 | 0.1414552 | 2 | 10.48239 | 4.591210 | 0.0369787 |
|
| treatment | 0.9164438 | 0.1832888 | 5 | 64.73516 | 5.949003 | 0.0001381 |
|
| log10(Final_reads) | 0.3982546 | 0.3982546 | 1 | 68.06101 | 12.926151 | 0.0006086 |
|
| sample_type:treatment | 0.4601290 | 0.0460129 | 10 | 64.32239 | 1.493441 | 0.1624116 |
cat("\n 0.07 p-value for the interaction term. May include the interaction term. \n")
##
## 0.07 p-value for the interaction term. May include the interaction term.
#Inverse Simpson - stratified
cat("BPI at NS\n\n")
## BPI at NS
cat("BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -0.8393907 | 0.3486121 | 27.99482 | -2.4078071 | 0.0228843 |
|
| treatmentlyPMA | 0.0510698 | 0.0805909 | 25.70455 | 0.6336915 | 0.5318810 | |
| treatmentBenzonase | -0.1377638 | 0.0764151 | 25.81764 | -1.8028343 | 0.0831000 | |
| treatmentHost zero | -0.2586338 | 0.0873090 | 26.15521 | -2.9622802 | 0.0064248 |
|
| treatmentMolysis | -0.2183024 | 0.0767411 | 25.72237 | -2.8446604 | 0.0086028 |
|
| treatmentQIAamp | -0.1843464 | 0.0934638 | 25.80702 | -1.9723826 | 0.0593709 | |
| log10(Final_reads) | 0.2299546 | 0.0508548 | 26.50782 | 4.5217915 | 0.0001142 |
|
cat("BPI at BAL\n\n")
## BPI at BAL
cat("BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.3043331 | 0.5301349 | 20.47181 | 0.5740673 | 0.5721764 | |
| treatmentlyPMA | -0.0735485 | 0.1510204 | 17.01214 | -0.4870103 | 0.6324666 | |
| treatmentBenzonase | -0.1530734 | 0.1621430 | 18.28749 | -0.9440639 | 0.3574437 | |
| treatmentHost zero | -0.0421982 | 0.1691435 | 18.51848 | -0.2494819 | 0.8057344 | |
| treatmentMolysis | -0.3085461 | 0.1738005 | 18.65049 | -1.7752889 | 0.0921718 | |
| treatmentQIAamp | -0.1290205 | 0.1744898 | 18.66877 | -0.7394156 | 0.4688510 | |
| log10(Final_reads) | 0.0815548 | 0.0944078 | 20.17656 | 0.8638567 | 0.3978202 |
cat("BPI at sputum\n\n")
## BPI at sputum
cat("BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.0286770 | 1.1254512 | 20.89128 | 0.0254805 | 0.9799136 | |
| treatmentlyPMA | -0.2775806 | 0.1614755 | 19.71412 | -1.7190265 | 0.1012737 | |
| treatmentBenzonase | -0.3995336 | 0.2041454 | 20.07039 | -1.9571028 | 0.0643899 | |
| treatmentHost zero | -0.4751333 | 0.3441062 | 20.44288 | -1.3807754 | 0.1822586 | |
| treatmentMolysis | -0.5569118 | 0.4014067 | 20.49523 | -1.3874001 | 0.1802223 | |
| treatmentQIAamp | -0.4309461 | 0.2985350 | 20.37752 | -1.4435365 | 0.1640679 | |
| log10(Final_reads) | 0.1055072 | 0.1918616 | 20.63853 | 0.5499133 | 0.5882813 |
A4. Taxa beta diversity
Permanova (Taxa dist ~ log10(final reads) + sample_type + treatment + sample_type * treatment + subject_id) –> both stratified and nonstratified
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_rel, S.obs != 0 & sample_type %in% c("BAL", "Nasal swab", "Sputum"))
bray_perm_uni <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_inter <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type * treatment + log10(Final_reads) + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads) + subject_id,
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab") %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads) + subject_id,
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads) + subject_id,
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum") %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
Results
Treatment 1/0
cat("\nUnivariate analysis\n")
##
## Univariate analysis
bray_perm_uni %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 56.251 | 0.000 |
|
| log10(Final reads) | 1 | 1.111 | 0.031 | 9.310 | 0.000 |
|
| Treatment | 5 | 1.213 | 0.033 | 2.033 | 0.001 |
|
| Subject | 10 | 11.639 | 0.321 | 9.751 | 0.000 |
|
| Residual | 74 | 8.833 | 0.244 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
Detailed treatment
cat("\nDetailed treatment\n")
##
## Detailed treatment
bray_perm %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 56.251 | 0.000 |
|
| log10(Final reads) | 1 | 1.111 | 0.031 | 9.310 | 0.000 |
|
| lyPMA | 1 | 0.140 | 0.004 | 1.172 | 0.287 | |
| Benzonase | 1 | 0.092 | 0.003 | 0.774 | 0.607 | |
| Host zero | 1 | 0.157 | 0.004 | 1.312 | 0.218 | |
| Molysis | 1 | 0.200 | 0.006 | 1.676 | 0.103 | |
| QIAamp | 1 | 0.624 | 0.017 | 5.229 | 0.000 |
|
| Subject | 10 | 11.639 | 0.321 | 9.751 | 0.000 |
|
| Residual | 74 | 8.833 | 0.244 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
Strata term
cat("\n Strata -detailed treatment\n")
##
## Strata -detailed treatment
bray_perm_strata %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 27.550 | 0.000 |
|
| log10(Final reads) | 1 | 1.111 | 0.031 | 4.560 | 0.000 |
|
| lyPMA | 1 | 0.140 | 0.004 | 0.574 | 0.530 | |
| Benzonase | 1 | 0.092 | 0.003 | 0.379 | 0.665 | |
| Host zero | 1 | 0.157 | 0.004 | 0.643 | 0.406 | |
| Molysis | 1 | 0.200 | 0.006 | 0.821 | 0.167 | |
| QIAamp | 1 | 0.624 | 0.017 | 2.561 | 0.003 |
|
| Residual | 84 | 20.472 | 0.565 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
Interaction term
cat("\n Interaction term \n")
##
## Interaction term
bray_perm_inter %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type X treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 13.429 | 0.371 | 65.605 | 0 |
|
| Treatment | 5 | 1.184 | 0.033 | 2.314 | 0 |
|
| log10(Final reads) | 1 | 1.140 | 0.031 | 11.142 | 0 |
|
| Subject | 10 | 11.639 | 0.321 | 11.372 | 0 |
|
| Sample type X treatment | 10 | 2.283 | 0.063 | 2.231 | 0 |
|
| Residual | 64 | 6.550 | 0.181 | NA | NA | |
| Total | 92 | 36.225 | 1.000 | NA | NA |
Stratified
cat("\n Stratified - nasal swab \n")
##
## Stratified - nasal swab
bray_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.730 | 0.120 | 5.122 | 0.004 |
|
| Benzonase | 1 | 0.191 | 0.031 | 1.342 | 0.262 | |
| Host zero | 1 | 0.171 | 0.028 | 1.201 | 0.306 | |
| Molysis | 1 | 0.137 | 0.022 | 0.961 | 0.405 | |
| QIAamp | 1 | 0.254 | 0.042 | 1.780 | 0.148 | |
| log10(Final reads) | 1 | 0.428 | 0.070 | 3.007 | 0.036 |
|
| Subject id | 2 | 0.488 | 0.080 | 1.714 | 0.119 | |
| Residual | 26 | 3.704 | 0.607 | NA | NA | |
| Total | 34 | 6.103 | 1.000 | NA | NA |
cat("\n Stratified - BAL \n")
##
## Stratified - BAL
bray_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.100 | 0.010 | 1.144 | 0.308 | |
| Benzonase | 1 | 0.025 | 0.003 | 0.288 | 0.934 | |
| Host zero | 1 | 0.086 | 0.009 | 0.990 | 0.418 | |
| Molysis | 1 | 0.085 | 0.009 | 0.969 | 0.428 | |
| QIAamp | 1 | 0.229 | 0.024 | 2.625 | 0.035 |
|
| log10(Final reads) | 1 | 1.482 | 0.152 | 16.966 | 0.000 |
|
| Subject id | 4 | 6.241 | 0.641 | 17.864 | 0.000 |
|
| Residual | 17 | 1.485 | 0.153 | NA | NA | |
| Total | 27 | 9.734 | 1.000 | NA | NA |
cat("\n Stratified - sputum \n")
##
## Stratified - sputum
bray_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.139 | 0.020 | 2.565 | 0.046 |
|
| Benzonase | 1 | 0.037 | 0.005 | 0.688 | 0.635 | |
| Host zero | 1 | 0.171 | 0.025 | 3.150 | 0.018 |
|
| Molysis | 1 | 0.436 | 0.063 | 8.046 | 0.000 |
|
| QIAamp | 1 | 0.953 | 0.137 | 17.588 | 0.000 |
|
| log10(Final reads) | 1 | 0.172 | 0.025 | 3.174 | 0.017 |
|
| Subject id | 4 | 4.022 | 0.578 | 18.555 | 0.000 |
|
| Residual | 19 | 1.030 | 0.148 | NA | NA | |
| Total | 29 | 6.960 | 1.000 | NA | NA |
A5. DA analysis for taxa, by sample type and treatment
--> both stratified and nonstratified
--> look at level groups as well - phylum, etc.
Results
MaAslin without interaction
#DA analysis - MaAslin
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
# Maaslin - # # y ~ log(final reads) + sample_type + treatment -----------
#all samples
#Checking number of bugs for the main text of the manuscript
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_all %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 7 6 51 14
## molysis qiaamp sample_type
## 9 2 66
cat("Decreased bugs by each metadata")
## Decreased bugs by each metadata
maaslin_all %>% subset(., .$qval < 0.1 & .$coef < 0) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 1 1 1 2
## molysis sample_type
## 1 1
cat("Incrased")
## Incrased
maaslin_all %>% subset(., .$qval < 0.1 & .$coef > 0) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 6 5 50 12
## molysis qiaamp sample_type
## 8 2 65
cat("Number of differentially abundant bugs by each metadata in NS")
## Number of differentially abundant bugs by each metadata in NS
fit_data_ns %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 2 6 9 4
## molysis qiaamp
## 4 1
cat("Number of differentially abundant bugs by each metadata in BAL")
## Number of differentially abundant bugs by each metadata in BAL
fit_data_bal %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## < table of extent 0 >
cat("Number of differentially abundant bugs by each metadata in sputum")
## Number of differentially abundant bugs by each metadata in sputum
fit_data_spt %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 8 5 1 8
## molysis qiaamp
## 8 7
cat("\nAdding control data to both all and stratified MaAslin will help identify the actual contaminant\n")
##
## Adding control data to both all and stratified MaAslin will help identify the actual contaminant
#Making significance table for figure
# Define a function to make species names italicized
species_italic <- function(data) {
names <- gsub("_", " ", rownames(data))
names <- gsub("[]]|[[]", "", names)
names <- gsub(" sp", " sp.", names)
names <- gsub(" sp.", "* sp.", names)
names <- gsub(" group", "* group.", names)
names <- ifelse(grepl("[*]", names), paste("*", names, sep = ""), paste("*", names, "*", sep = ""))
rownames(data) <- names
data
}
#volcano plot of MaAslin with all data
ggplot(maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with treatment types")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#4daf4a", "#984ea3", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 3))
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>% column_to_rownames(var = "feature") %>% species_italic %>% select(-c("-", "min", "log10.Final_reads"))
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
fit_data_bal <- make_sig_table(fit_data_bal)
fit_data_ns <- make_sig_table(fit_data_ns)
fit_data_spt <- make_sig_table(fit_data_spt)
fit_data_ns$data
cat("\n\nStratified Masslin - significant of each but by treatment in BAL\n")
##
##
## Stratified Masslin - significant of each but by treatment in BAL
fit_data_bal$data_sig
## benzonase host_zero lypma molysis qiaamp
## *Actinomyces radingae* NA NA NA NA NA
## *Bulleidia extructa* NA NA NA NA NA
## *Enterococcus avium* NA NA NA NA NA
## *Finegoldia magna* NA NA NA NA NA
## *Gemella morbillorum* NA NA NA NA NA
## *Peptostreptococcus* sp. MV1 NA NA NA NA NA
## *Peptostreptococcus stomatis* NA NA NA NA NA
## *Staphylococcus epidermidis* NA NA NA NA NA
## *Staphylococcus schweitzeri* NA NA NA NA NA
## *Varibaculum cambriense* NA NA NA NA NA
## *Actinomyces naeslundii* NA NA NA NA NA
## *Actinomyces* sp. oral taxon 181 NA NA NA NA NA
## *Candida dubliniensis* NA NA NA NA NA
## *Corynebacterium tuberculostearicum* NA NA NA NA NA
## *Gemella asaccharolytica* NA NA NA NA NA
## *Gemella sanguinis* NA NA NA NA NA
## *Rothia mucilaginosa* NA NA NA NA NA
## *Gemella haemolysans* NA NA NA NA NA
## *Negativicoccus succinicivorans* NA NA NA NA NA
## *Sutterella parvirubra* NA NA NA NA NA
cat("\n\nStratified Masslin - significant of each but by treatment in nasal swab\n")
##
##
## Stratified Masslin - significant of each but by treatment in nasal swab
fit_data_ns$data_sig
## benzonase host_zero lypma molysis qiaamp
## *Cupriavidus* sp. "*" "*" "*" "*" NA
## *Streptococcus oralis* NA NA NA "*" NA
## *Staphylococcus epidermidis* NA NA "*" NA NA
## *Sutterella parvirubra* NA "*" "*" NA NA
## *Aeriscardovia aeriphila* NA NA NA NA NA
## *Rothia mucilaginosa* NA "*" NA "*" NA
## *Corynebacterium atypicum* NA NA NA NA NA
## *Propionibacterium namnetense* NA NA NA NA NA
## *Slackia isoflavoniconvertens* NA NA NA NA NA
## *Staphylococcus aureus* NA NA NA NA NA
## *Cutibacterium avidum* NA NA NA NA NA
## *Staphylococcus argenteus* NA "*" NA NA NA
## *Collinsella massiliensis* NA NA NA NA "*"
## *Limnochorda pilosa* NA NA NA "*" NA
## *Corynebacterium accolens* NA NA "*" NA NA
## *Malassezia restricta* "*" NA NA NA NA
## *Collinsella intestinalis* NA "*" NA NA NA
## *Finegoldia magna* NA "*" NA NA NA
## *Gemella haemolysans* NA NA NA NA NA
## *Thermoleophilum album* NA NA NA NA NA
cat("\n\nStratified Masslin - significant of each but by treatment in Sputum\n")
##
##
## Stratified Masslin - significant of each but by treatment in Sputum
fit_data_spt$data_sig
## benzonase host_zero lypma molysis qiaamp
## *Streptococcus infantis* "*" NA "*" NA NA
## *Gemella haemolysans* "*" NA "*" NA NA
## *Bifidobacterium longum* "*" "*" NA "*" "*"
## *Eubacterium sulci* "*" "*" NA "*" "*"
## *Pseudomonas aeruginosa* group. NA NA NA "*" "*"
## *Granulicatella elegans* "*" NA "*" NA NA
## *Rothia dentocariosa* "*" NA "*" NA NA
## *Streptococcus* sp. F0442 "*" NA "*" NA NA
## *Neisseria flavescens* "*" NA "*" NA NA
## *Rothia mucilaginosa* NA NA "*" NA NA
## *Actinomyces naeslundii* NA "*" NA "*" "*"
## *Denitrobacterium detoxificans* NA "*" NA "*" "*"
## *Actinomyces* sp. oral taxon 897 NA "*" NA "*" "*"
## *Prevotella melaninogenica* NA NA NA "*" "*"
## *Gemmata obscuriglobus* NA NA NA "*" NA
## *Streptococcus gordonii* NA NA "*" NA NA
## *Corynebacterium durum* NA NA NA NA NA
## *Actinomyces* sp. HPA0247 NA NA NA NA NA
## *Streptococcus sanguinis* NA NA NA NA NA
## *Actinomyces massiliensis* NA NA NA NA NA
Results After adding control data, MaAslin needs to be reanalyzed. Adding controls (mock communities) for each treatment group will show more statistically valid results in y ~ log(final reads) + sample_type + treatment, (re = subject_id))
MaAslin with interaction
#Generating interaction term
sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, sample_data(phyloseq_rel_nz)$treatment, sep = "*")
#Checking number of bugs for the main text of the manuscript
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_all %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 7 6 51 14
## molysis qiaamp sample_type
## 9 2 66
#Checking number of bugs differentially abundance with interaction term
cat("Number of differentially abundant bugs by each metadata, MaAslin with interaction term")
## Number of differentially abundant bugs by each metadata, MaAslin with interaction term
maaslin_interaction %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## log10.Final_reads sample_type sampletype_treatment
## 6 73 172
## treatment
## 24
maaslin_interaction
#interaction term - ggplot
ggplot(maaslin_interaction, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with interaction term")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 3))
cat("\n\nMasslin run results with q < 0.1 by variable, with interaction term\n")
##
##
## Masslin run results with q < 0.1 by variable, with interaction term
maaslin_interaction %>% subset(., .$qval < 0.1) %>% .$name %>% table
## < table of extent 0 >
cat("\n\nMasslin run results with q < 0.1 by variable, with interaction term\n")
##
##
## Masslin run results with q < 0.1 by variable, with interaction term
maaslin_interaction %>% subset(., .$qval < 0.1 & .$coef > 0 & .$metadata == "treatment")
maaslin_interaction %>% subset(., .$qval < 0.1 & .$coef > 0 & .$metadata == "treatment") %>% select(c("feature", "value", "coef", "qval"))
cat("\n\nThese taxa were highly prevenlent by each treatmment.\n")
##
##
## These taxa were highly prevenlent by each treatmment.
cat("But they are not contaminants, if 1) they are present in most of the treatments, or, 2) they are included in other interaction terms.\n")
## But they are not contaminants, if 1) they are present in most of the treatments, or, 2) they are included in other interaction terms.
cat("\n\nChecking Case 1 \n")
##
##
## Checking Case 1
maaslin_interaction %>% subset(., .$qval < 0.1 & .$coef > 0 & .$metadata == "treatment") %>% select(c("feature", "value", "coef", "qval")) %>% .$feature %>% table
## .
## Candida_albicans Candida_dubliniensis Candida_parapsilosis
## 5 5 5
## Cupriavidus_sp Gemella_haemolysans Sutterella_parvirubra
## 3 2 3
cat("\nMost of taxa were treatment specific, however they were found on most of treatments. They could be lab contaminants or newly found taxa. Further analysis is required with control groups.\n")
##
## Most of taxa were treatment specific, however they were found on most of treatments. They could be lab contaminants or newly found taxa. Further analysis is required with control groups.
cat("\n\nChecking hypothesis 2\n")
##
##
## Checking hypothesis 2
subset(maaslin_interaction,
maaslin_interaction$feature %in% (maaslin_interaction %>%
subset(., .$qval < 0.1 & .$metadata == "treatment") %>%
.$feature %>% table %>% names)) %>%
subset(., .$metadata == "sampletype_treatment" & .$qval < 0.1) %>% select(c("feature", "value", "coef", "qval"))
cat("\nAmong taxa that were treatment specific, some were `sampletype * treatment` specific at the same time. This may be due to the bacterial community is slightly weighted to the treated samples, as they have larger sample numbers. In detail, they were sample specific, therefore it is less likely they are contaminants appeared during the host-DNA depletion step.\n")
##
## Among taxa that were treatment specific, some were `sampletype * treatment` specific at the same time. This may be due to the bacterial community is slightly weighted to the treated samples, as they have larger sample numbers. In detail, they were sample specific, therefore it is less likely they are contaminants appeared during the host-DNA depletion step.
MaAslin at Genus level
#DA analysis - MaAslin
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
phyloseq_rel_nz_genus <- tax_glom(phyloseq_rel_nz, "Genus")
phyloseq_rel_nz_family <- tax_glom(phyloseq_rel_nz, "Family")
taxa_names(phyloseq_rel_nz_genus) <- tax_table(phyloseq_rel_nz_genus) %>% data.frame %>% .$Genus %>% gsub("g__", "", .)
taxa_names(phyloseq_rel_nz_family) <- tax_table(phyloseq_rel_nz_family) %>% data.frame %>% .$Family %>% gsub("g__", "", .)
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
MaAslin at Family level
#Checking number of bugs for the main text of the manuscript
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_all %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 6 5 19 3
## molysis qiaamp sample_type
## 8 2 26
cat("Decreased bugs by each metadata")
## Decreased bugs by each metadata
maaslin_all %>% subset(., .$qval < 0.1 & .$coef < 0) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads molysis
## 1 1 1 2
## sample_type
## 1
cat("Incrased")
## Incrased
maaslin_all %>% subset(., .$qval < 0.1 & .$coef > 0) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 5 4 18 3
## molysis qiaamp sample_type
## 6 2 25
#Checking number of bugs for the main text of the manuscript
cat("Number of differentially abundant bugs by each metadata in NS")
## Number of differentially abundant bugs by each metadata in NS
fit_data_ns %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 2 3 5 4
## molysis qiaamp
## 5 1
cat("Number of differentially abundant bugs by each metadata in BAL")
## Number of differentially abundant bugs by each metadata in BAL
fit_data_bal %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 1 1 6 2
## molysis qiaamp
## 1 1
cat("Number of differentially abundant bugs by each metadata in sputum")
## Number of differentially abundant bugs by each metadata in sputum
fit_data_spt %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero lypma molysis qiaamp
## 5 2 3 3 2
cat("\nAdding control data to both all and stratified MaAslin will help identify the actual contaminant\n")
##
## Adding control data to both all and stratified MaAslin will help identify the actual contaminant
#volcano plot of MaAslin with all data
ggplot(maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with treatment types")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#4daf4a", "#984ea3", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 3))
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% .[1:10,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>% column_to_rownames(var = "feature") %>% species_italic %>% select(-c("-", "min", "log10.Final_reads"))
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
fit_data_bal <- make_sig_table(fit_data_bal)
fit_data_ns <- make_sig_table(fit_data_ns)
fit_data_spt <- make_sig_table(fit_data_spt)
cat("\n\nStratified Masslin - significant of each but by treatment in BAL\n")
##
##
## Stratified Masslin - significant of each but by treatment in BAL
fit_data_bal$data_sig
## benzonase host_zero lypma molysis qiaamp
## * f__Debaryomycetaceae * "*" "*" "*" "*" "*"
## * f__Actinomycetaceae * NA NA NA NA NA
## * f__Peptostreptococcaceae * NA NA NA NA NA
## * f__Peptoniphilaceae * NA NA "*" NA NA
## * f__Erysipelotrichaceae * NA NA NA NA NA
## * f__Bifidobacteriaceae * NA NA NA NA NA
## * f__Micrococcaceae * NA NA NA NA NA
## * f__Corynebacteriaceae * NA NA NA NA NA
## * f__Bacillales_unclassified * NA NA NA NA NA
## * f__Sutterellaceae * NA NA NA NA NA
cat("\n\nStratified Masslin - significant of each but by treatment in nasal swab\n")
##
##
## Stratified Masslin - significant of each but by treatment in nasal swab
fit_data_ns$data_sig
## benzonase host_zero lypma molysis qiaamp
## * f__Burkholderiaceae * "*" "*" "*" "*" "*"
## * f__Staphylococcaceae * NA NA "*" NA NA
## * f__Sutterellaceae * NA "*" "*" NA NA
## * f__Bifidobacteriaceae * NA NA NA NA NA
## * f__Micrococcaceae * NA "*" NA "*" NA
## * f__Eggerthellaceae * NA NA NA NA NA
## * f__Malasseziaceae * "*" NA NA "*" NA
## * f__Limnochordaceae * NA NA NA "*" NA
## * f__Corynebacteriaceae * NA NA "*" NA NA
## * f__Streptococcaceae * NA NA NA "*" NA
cat("\n\nStratified Masslin - significant of each but by treatment in Sputum\n")
##
##
## Stratified Masslin - significant of each but by treatment in Sputum
fit_data_spt$data_sig
## benzonase host_zero lypma molysis qiaamp
## * f__Carnobacteriaceae * "*" NA "*" NA NA
## * f__Pseudomonadaceae * NA "*" NA "*" "*"
## * f__Micrococcaceae * NA NA "*" NA NA
## * f__Bifidobacteriaceae * "*" NA NA NA NA
## * f__Corynebacteriaceae * NA "*" NA "*" "*"
## * f__Erysipelotrichaceae * "*" NA NA NA NA
## * f__Gemmataceae * NA NA NA "*" NA
## * f__Neisseriaceae * "*" NA "*" NA NA
## * f__Bacillales_unclassified * "*" NA NA NA NA
## * f__Ectothiorhodospiraceae * NA NA NA NA NA
A6. Decontam - stratified by treatment
--> both stratified and nonstratified
input of DNA concentration: 16S qPCR data
https://github.com/benjjneb/decontam/issues/33
Ben Callahan: But in the more limited testing on qPCR data the method still seems to work, and other publications report strong patterns of inverse frequency of contaminants using qPCR data - which is the pattern the frequency method relies on.
Strategy:
2.4.1. run decontam for all samples (common contaminants, by extraction)
2.4.2. stratify decontam analysis per each treatment method (contaminants by depletion methods)
Results
decontam - all sample
# Decontam package --------------------------------------------------------
# common contaminants across all the treatment methods
#Decontam - were there any contaminants?#
sample_data(phyloseq$phyloseq_rel)$is.neg <- grepl("Neg. ext.", sample_data(phyloseq$phyloseq_rel)$sample_type)
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_rel, S.obs != 0)
#With all sampels
dec_f_all <- isContaminant(phyloseq_rel_nz, method="frequency", conc="DNA_bac_well")
dec_p_all <- isContaminant(phyloseq_rel_nz, method="prevalence", neg="is.neg", threshold=0.5)
dec_c_all <- isContaminant(phyloseq_rel_nz, method="combined", neg="is.neg", conc = "DNA_bac_well")
cat("decontam frequency - all sample")
## decontam frequency - all sample
dec_f_all %>% subset(.,.$contaminant)
cat("decontam prevalence - all sample")
## decontam prevalence - all sample
dec_p_all %>% subset(.,.$contaminant)
cat("decontam combined - all sample")
## decontam combined - all sample
dec_c_all %>% subset(.,.$contaminant)
decontam - stratified
#Stratified by sample type
cat("decontam prevalence - BAL")
## decontam prevalence - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg. ext.")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - Nasal swab")
## decontam prevalence - Nasal swab
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal swab", "Neg. ext.")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam prevalence - Sputum")
## decontam prevalence - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg. ext.")) %>%
isContaminant(., method="prevalence", neg = "is.neg", threshold = 0.5) %>% subset(.,.$contaminant)
cat("decontam frequency - BAL")
## decontam frequency - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg. ext.")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Nasal swab")
## decontam frequency - Nasal swab
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal swab", "Neg. ext.")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam frequency - Sputum")
## decontam frequency - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg. ext.")) %>%
isContaminant(method="frequency", conc="DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - BAL")
## decontam combined - BAL
subset_samples(phyloseq_rel_nz, sample_type %in% c("BAL", "Neg. ext.")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Nasal swab")
## decontam combined - Nasal swab
subset_samples(phyloseq_rel_nz, sample_type %in% c("Nasal swab", "Neg. ext.")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
cat("decontam combined - Sputum")
## decontam combined - Sputum
subset_samples(phyloseq_rel_nz, sample_type %in% c("Sputum", "Neg. ext.")) %>%
isContaminant(method="combined", neg="is.neg", conc = "DNA_bac_well") %>% subset(.,.$contaminant)
A7. LM of function alpha diversity
--> both stratified and nonstratified
Result
Function richness
sample_data <- sample_data(phyloseq$phyloseq_path_rpkm) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
cat("Species richness of all samples\n\n")
## Species richness of all samples
cat("S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_sob <- lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer(S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -271.32533 | 63.94609 | 72.47916 | -4.2430322 | 0.0000643 |
|
| sample_typeNasal swab | 66.71179 | 39.44578 | 32.64336 | 1.6912273 | 0.1003209 | |
| sample_typeSputum | 142.80087 | 36.31835 | 41.80438 | 3.9319203 | 0.0003111 |
|
| treatmentlyPMA | 59.31768 | 31.18858 | 64.39962 | 1.9019036 | 0.0616583 | |
| treatmentBenzonase | 96.59262 | 31.65626 | 66.97650 | 3.0512965 | 0.0032643 |
|
| treatmentHost zero | 129.84121 | 32.25492 | 67.36194 | 4.0254694 | 0.0001466 |
|
| treatmentMolysis | 150.32258 | 32.65321 | 67.58822 | 4.6036081 | 0.0000189 |
|
| treatmentQIAamp | 86.27368 | 32.71224 | 67.61999 | 2.6373516 | 0.0103578 |
|
| log10(Final_reads) | 52.77944 | 11.01007 | 70.54314 | 4.7937427 | 0.0000088 |
|
| sample_typeNasal swab:treatmentlyPMA | -23.81209 | 39.44179 | 64.87664 | -0.6037273 | 0.5481301 | |
| sample_typeSputum:treatmentlyPMA | -5.02253 | 39.40167 | 62.83470 | -0.1274700 | 0.8989755 | |
| sample_typeNasal swab:treatmentBenzonase | -80.25929 | 38.01990 | 65.40988 | -2.1109811 | 0.0386006 |
|
| sample_typeSputum:treatmentBenzonase | -52.43718 | 38.65224 | 63.57018 | -1.3566400 | 0.1796940 | |
| sample_typeNasal swab:treatmentHost zero | -106.90271 | 36.81521 | 64.23327 | -2.9037647 | 0.0050492 |
|
| sample_typeSputum:treatmentHost zero | -76.35866 | 38.92443 | 62.75019 | -1.9617158 | 0.0542352 | |
| sample_typeNasal swab:treatmentMolysis | -106.97855 | 38.54815 | 66.00383 | -2.7751931 | 0.0071693 |
|
| sample_typeSputum:treatmentMolysis | -108.15918 | 39.34418 | 62.63915 | -2.7490513 | 0.0077999 |
|
| sample_typeNasal swab:treatmentQIAamp | -75.62038 | 36.68939 | 64.06591 | -2.0610968 | 0.0433576 |
|
| sample_typeSputum:treatmentQIAamp | -40.59820 | 38.59881 | 63.08680 | -1.0517993 | 0.2969039 |
cat("ANOVA for \n S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## S.obs ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_sob %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 31544.55 | 15772.274 | 2 | 10.06740 | 10.123058 | 0.0038929 |
|
| treatment | 33352.07 | 6670.414 | 5 | 64.14678 | 4.281246 | 0.0020153 |
|
| log10(Final_reads) | 35804.04 | 35804.040 | 1 | 70.54314 | 22.979969 | 0.0000088 |
|
| sample_type:treatment | 29095.43 | 2909.543 | 10 | 63.26761 | 1.867421 | 0.0667651 |
cat("\n 0.06 p-value for the interaction term. May include the interaction term. \n")
##
## 0.06 p-value for the interaction term. May include the interaction term.
#Species richness - stratified
cat("Species richness at NS\n\n")
## Species richness at NS
cat("S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 68.35891 | 68.76371 | 28 | 0.9941131 | 0.3286817 | |
| treatmentlyPMA | 13.88909 | 17.11543 | 28 | 0.8114955 | 0.4239262 | |
| treatmentBenzonase | 20.95703 | 16.24183 | 28 | 1.2903126 | 0.2074965 | |
| treatmentHost zero | 56.43052 | 18.37320 | 28 | 3.0713495 | 0.0047048 |
|
| treatmentMolysis | 51.56131 | 16.32822 | 28 | 3.1578029 | 0.0037880 |
|
| treatmentQIAamp | 54.41631 | 19.71859 | 28 | 2.7596444 | 0.0100871 |
|
| log10(Final_reads) | 12.25116 | 10.45340 | 28 | 1.1719787 | 0.2510816 |
cat("Species richness at BAL\n\n")
## Species richness at BAL
cat("S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -640.83129 | 99.14027 | 19.00295 | -6.4638848 | 0.0000034 |
|
| treatmentlyPMA | 14.46191 | 33.76511 | 18.36963 | 0.4283092 | 0.6734060 | |
| treatmentBenzonase | 16.49562 | 35.65062 | 19.98741 | 0.4627021 | 0.6485757 | |
| treatmentHost zero | 39.81066 | 36.93641 | 19.98154 | 1.0778162 | 0.2939563 | |
| treatmentMolysis | 54.18626 | 37.78713 | 19.92262 | 1.4339874 | 0.1670859 | |
| treatmentQIAamp | -10.73947 | 37.91284 | 19.91125 | -0.2832674 | 0.7798961 | |
| log10(Final_reads) | 124.09731 | 17.94627 | 17.34430 | 6.9149360 | 0.0000022 |
|
cat("Species richness at sputum\n\n")
## Species richness at sputum
cat("S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## S.obs ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(S.obs ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -127.58303 | 198.98655 | 21.38568 | -0.6411641 | 0.5282282 | |
| treatmentlyPMA | 54.38241 | 28.77622 | 19.88708 | 1.8898385 | 0.0734460 | |
| treatmentBenzonase | 44.29211 | 36.29072 | 20.41549 | 1.2204804 | 0.2361930 | |
| treatmentHost zero | 53.75292 | 61.00733 | 20.95720 | 0.8810895 | 0.3882627 | |
| treatmentMolysis | 42.48496 | 71.13867 | 21.03201 | 0.5972132 | 0.5567405 | |
| treatmentQIAamp | 45.90423 | 52.95334 | 20.86328 | 0.8668807 | 0.3958683 | |
| log10(Final_reads) | 52.61786 | 33.96577 | 21.23460 | 1.5491436 | 0.1361234 |
Function Shannon
#Shannon
cat("Shannon of all samples\n\n")
## Shannon of all samples
cat("Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_shannon <- lmer(data_shannon ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_shannon %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.5913112 | 0.1886495 | 72.61690 | 3.1344432 | 0.0024844 |
|
| sample_typeNasal swab | 0.3690633 | 0.1053813 | 38.98621 | 3.5021710 | 0.0011737 |
|
| sample_typeSputum | 0.2993038 | 0.0997287 | 55.53917 | 3.0011805 | 0.0040232 |
|
| treatmentlyPMA | 0.2036799 | 0.0942992 | 65.15176 | 2.1599339 | 0.0344627 |
|
| treatmentBenzonase | 0.0994949 | 0.0949856 | 68.98341 | 1.0474738 | 0.2985374 | |
| treatmentHost zero | 0.2573740 | 0.0966602 | 69.52729 | 2.6626671 | 0.0096242 |
|
| treatmentMolysis | 0.3398170 | 0.0977799 | 69.83854 | 3.4753267 | 0.0008813 |
|
| treatmentQIAamp | 0.0878239 | 0.0979461 | 69.88172 | 0.8966555 | 0.3729814 | |
| log10(Final_reads) | -0.0096706 | 0.0325805 | 72.89285 | -0.2968225 | 0.7674461 | |
| sample_typeNasal swab:treatmentlyPMA | -0.2226470 | 0.1190898 | 65.91555 | -1.8695722 | 0.0659859 | |
| sample_typeSputum:treatmentlyPMA | -0.0936705 | 0.1196459 | 62.61750 | -0.7828973 | 0.4366384 | |
| sample_typeNasal swab:treatmentBenzonase | -0.0512598 | 0.1146272 | 66.66595 | -0.4471872 | 0.6561894 | |
| sample_typeSputum:treatmentBenzonase | 0.0043312 | 0.1171417 | 63.75882 | 0.0369743 | 0.9706210 | |
| sample_typeNasal swab:treatmentHost zero | -0.2539966 | 0.1113714 | 64.80534 | -2.2806270 | 0.0258673 |
|
| sample_typeSputum:treatmentHost zero | -0.2117509 | 0.1182234 | 62.48276 | -1.7911090 | 0.0781193 | |
| sample_typeNasal swab:treatmentMolysis | -0.3660377 | 0.1160263 | 67.46898 | -3.1547817 | 0.0023993 |
|
| sample_typeSputum:treatmentMolysis | -0.2802693 | 0.1195318 | 62.32165 | -2.3447254 | 0.0222411 |
|
| sample_typeNasal swab:treatmentQIAamp | -0.1937200 | 0.1110482 | 64.48281 | -1.7444684 | 0.0858422 | |
| sample_typeSputum:treatmentQIAamp | -0.0639711 | 0.1171312 | 63.00030 | -0.5461489 | 0.5868925 |
cat("ANOVA for \n Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## Shannon ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_shannon %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.1823346 | 0.0911673 | 2 | 9.000626 | 6.3262277 | 0.0192428 |
|
| treatment | 0.2025353 | 0.0405071 | 5 | 64.449929 | 2.8108418 | 0.0233602 |
|
| log10(Final_reads) | 0.0012697 | 0.0012697 | 1 | 72.892849 | 0.0881036 | 0.7674461 | |
| sample_type:treatment | 0.2764134 | 0.0276413 | 10 | 63.238098 | 1.9180715 | 0.0589214 |
cat("\n 0.059 p-value for the interaction term. May include the interaction term. \n")
##
## 0.059 p-value for the interaction term. May include the interaction term.
#Shannon - stratified
cat("Shannon at NS\n\n")
## Shannon at NS
cat("Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.9021024 | 0.1888432 | 28 | 4.7769924 | 0.0000510 |
|
| treatmentlyPMA | -0.0120429 | 0.0470035 | 28 | -0.2562131 | 0.7996594 | |
| treatmentBenzonase | 0.0493991 | 0.0446043 | 28 | 1.1074968 | 0.2775028 | |
| treatmentHost zero | -0.0017956 | 0.0504576 | 28 | -0.0355870 | 0.9718642 | |
| treatmentMolysis | -0.0302035 | 0.0448416 | 28 | -0.6735608 | 0.5061130 | |
| treatmentQIAamp | -0.1176817 | 0.0541524 | 28 | -2.1731574 | 0.0383841 |
|
| log10(Final_reads) | -0.0007741 | 0.0287078 | 28 | -0.0269635 | 0.9786801 |
cat("Shannon at BAL\n\n")
## Shannon at BAL
cat("Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.3164981 | 0.4384386 | 19.99999 | 0.7218756 | 0.4787266 | |
| treatmentlyPMA | 0.1566219 | 0.1595240 | 20.00000 | 0.9818075 | 0.3379233 | |
| treatmentBenzonase | 0.0081445 | 0.1629779 | 20.00000 | 0.0499731 | 0.9606395 | |
| treatmentHost zero | 0.1579336 | 0.1676636 | 20.00000 | 0.9419670 | 0.3574494 | |
| treatmentMolysis | 0.2354040 | 0.1708188 | 20.00000 | 1.3780916 | 0.1833989 | |
| treatmentQIAamp | -0.0173032 | 0.1712883 | 20.00000 | -0.1010181 | 0.9205420 | |
| log10(Final_reads) | 0.0484117 | 0.0775187 | 19.99999 | 0.6245166 | 0.5393481 |
cat("Shannon at sputum\n\n")
## Shannon at sputum
cat("Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Shannon ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_shannon ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 1.9046747 | 0.3134676 | 20.38905 | 6.076146 | 0.0000056 |
|
| treatmentlyPMA | 0.2040040 | 0.0444997 | 19.33828 | 4.584393 | 0.0001943 |
|
| treatmentBenzonase | 0.2510413 | 0.0564000 | 19.52563 | 4.451090 | 0.0002582 |
|
| treatmentHost zero | 0.3368492 | 0.0953245 | 19.72386 | 3.533710 | 0.0021207 |
|
| treatmentMolysis | 0.4059052 | 0.1112411 | 19.75200 | 3.648878 | 0.0016224 |
|
| treatmentQIAamp | 0.2702564 | 0.0826606 | 19.68885 | 3.269471 | 0.0038945 |
|
| log10(Final_reads) | -0.1837105 | 0.0532273 | 19.82942 | -3.451430 | 0.0025476 |
|
Function Simpson
#Simpson
cat("Inverse Simpson of all samples\n\n")
## Inverse Simpson of all samples
cat("Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_invsimpson <- lmer(data_invsimpson ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_invsimpson %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.5041672 | 0.3100373 | 72.98246 | 8.0769868 | 0.0000000 |
|
| sample_typeNasal swab | 0.2824300 | 0.1602334 | 39.89109 | 1.7626164 | 0.0856259 | |
| sample_typeSputum | 0.3733177 | 0.1575829 | 65.69179 | 2.3690243 | 0.0207839 |
|
| treatmentlyPMA | 0.3355368 | 0.1587771 | 66.80622 | 2.1132571 | 0.0383176 |
|
| treatmentBenzonase | 0.0394673 | 0.1582723 | 71.20768 | 0.2493632 | 0.8037978 | |
| treatmentHost zero | 0.2243663 | 0.1607711 | 71.74004 | 1.3955639 | 0.1671506 | |
| treatmentMolysis | 0.3476547 | 0.1624552 | 72.01996 | 2.1400038 | 0.0357425 |
|
| treatmentQIAamp | 0.0491110 | 0.1627061 | 72.05712 | 0.3018387 | 0.7636445 | |
| log10(Final_reads) | -0.1584764 | 0.0531196 | 70.88174 | -2.9833884 | 0.0039077 |
|
| sample_typeNasal swab:treatmentlyPMA | -0.4433734 | 0.2001303 | 67.80520 | -2.2154242 | 0.0300927 |
|
| sample_typeSputum:treatmentlyPMA | -0.3749646 | 0.2026651 | 63.40668 | -1.8501686 | 0.0689497 | |
| sample_typeNasal swab:treatmentBenzonase | -0.0224270 | 0.1922834 | 68.59286 | -0.1166351 | 0.9074900 | |
| sample_typeSputum:treatmentBenzonase | -0.1906563 | 0.1979198 | 64.84986 | -0.9633004 | 0.3389752 | |
| sample_typeNasal swab:treatmentHost zero | -0.3133525 | 0.1877081 | 66.20325 | -1.6693600 | 0.0997686 | |
| sample_typeSputum:treatmentHost zero | -0.4532172 | 0.2003127 | 63.25108 | -2.2625485 | 0.0271066 |
|
| sample_typeNasal swab:treatmentMolysis | -0.5486404 | 0.1942378 | 69.41775 | -2.8245812 | 0.0061753 |
|
| sample_typeSputum:treatmentMolysis | -0.5345178 | 0.2025954 | 63.08079 | -2.6383508 | 0.0104810 |
|
| sample_typeNasal swab:treatmentQIAamp | -0.1871327 | 0.1873220 | 65.70759 | -0.9989895 | 0.3214646 | |
| sample_typeSputum:treatmentQIAamp | -0.3129724 | 0.1982404 | 63.88243 | -1.5787516 | 0.1193324 |
cat("ANOVA for \n Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## Inverse Simpson ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_invsimpson %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.0267838 | 0.0133919 | 2 | 6.835601 | 0.3224571 | 0.7348127 | |
| treatment | 0.1762910 | 0.0352582 | 5 | 65.324395 | 0.8489653 | 0.5201930 | |
| log10(Final_reads) | 0.3696492 | 0.3696492 | 1 | 70.881743 | 8.9006066 | 0.0039077 |
|
| sample_type:treatment | 0.7115045 | 0.0711505 | 10 | 64.116101 | 1.7131977 | 0.0968588 |
cat("\n 0.098 p-value for the interaction term. May include the interaction term. \n")
##
## 0.098 p-value for the interaction term. May include the interaction term.
#Inverse Simpson - stratified
cat("Inverse Simpson at NS\n\n")
## Inverse Simpson at NS
cat("Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.4341612 | 0.3741168 | 28 | 6.5064209 | 0.0000005 |
|
| treatmentlyPMA | -0.0792832 | 0.0931184 | 28 | -0.8514237 | 0.4017574 | |
| treatmentBenzonase | 0.0112922 | 0.0883655 | 28 | 0.1277900 | 0.8992286 | |
| treatmentHost zero | -0.1324711 | 0.0999615 | 28 | -1.3252214 | 0.1958106 | |
| treatmentMolysis | -0.2120235 | 0.0888356 | 28 | -2.3866962 | 0.0239973 |
|
| treatmentQIAamp | -0.1955251 | 0.1072813 | 28 | -1.8225468 | 0.0790685 | |
| log10(Final_reads) | -0.1054975 | 0.0568729 | 28 | -1.8549701 | 0.0741586 |
cat("Inverse Simpson at BAL\n\n")
## Inverse Simpson at BAL
cat("Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 2.7079552 | 0.7370091 | 18.45226 | 3.6742494 | 0.0016771 |
|
| treatmentlyPMA | 0.3560120 | 0.2545271 | 18.46585 | 1.3987194 | 0.1784686 | |
| treatmentBenzonase | 0.0769610 | 0.2668231 | 19.99899 | 0.2884345 | 0.7759824 | |
| treatmentHost zero | 0.2671909 | 0.2760392 | 19.90623 | 0.9679453 | 0.3446858 | |
| treatmentMolysis | 0.3937560 | 0.2821570 | 19.78512 | 1.3955212 | 0.1783255 | |
| treatmentQIAamp | 0.0956828 | 0.2830622 | 19.76444 | 0.3380276 | 0.7389065 | |
| log10(Final_reads) | -0.1967495 | 0.1328208 | 16.24623 | -1.4813153 | 0.1576569 |
cat("Inverse Simpson at sputum\n\n")
## Inverse Simpson at sputum
cat("Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## Inverse Simpson ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(data_invsimpson ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 4.2511748 | 0.4286288 | 20.78282 | 9.9180809 | 0.0000000 |
|
| treatmentlyPMA | 0.0879014 | 0.0615090 | 19.54384 | 1.4290812 | 0.1687705 | |
| treatmentBenzonase | 0.0482351 | 0.0777588 | 19.91984 | 0.6203172 | 0.5420814 | |
| treatmentHost zero | 0.1656569 | 0.1310623 | 20.31362 | 1.2639552 | 0.2205584 | |
| treatmentMolysis | 0.2823279 | 0.1528856 | 20.36901 | 1.8466618 | 0.0793776 | |
| treatmentQIAamp | 0.0699277 | 0.1137064 | 20.24448 | 0.6149849 | 0.5454161 | |
| log10(Final_reads) | -0.3942383 | 0.0730735 | 20.52069 | -5.3950901 | 0.0000256 |
|
Function BPI
#BPI
cat("BPI of all samples\n\n")
## BPI of all samples
cat("BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_dbp <- lmer(dbp ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id), data = sample_data)
lmer_dbp %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.3494454 | 0.1248399 | 72.85764 | 2.7991472 | 0.0065536 |
|
| sample_typeNasal swab | -0.0949470 | 0.0634800 | 38.59201 | -1.4957002 | 0.1428654 | |
| sample_typeSputum | -0.1489322 | 0.0631257 | 67.11730 | -2.3592951 | 0.0212271 |
|
| treatmentlyPMA | -0.1354895 | 0.0643160 | 67.14843 | -2.1066210 | 0.0388903 |
|
| treatmentBenzonase | -0.0126454 | 0.0639374 | 71.58151 | -0.1977775 | 0.8437797 | |
| treatmentHost zero | -0.0763098 | 0.0649149 | 72.08571 | -1.1755356 | 0.2436485 | |
| treatmentMolysis | -0.1501657 | 0.0655753 | 72.34056 | -2.2899747 | 0.0249412 |
|
| treatmentQIAamp | -0.0134435 | 0.0656737 | 72.37364 | -0.2047007 | 0.8383806 | |
| log10(Final_reads) | 0.0690874 | 0.0213231 | 69.37468 | 3.2400310 | 0.0018365 |
|
| sample_typeNasal swab:treatmentlyPMA | 0.1885595 | 0.0810249 | 68.19355 | 2.3271804 | 0.0229322 |
|
| sample_typeSputum:treatmentlyPMA | 0.1784163 | 0.0822236 | 63.57164 | 2.1698910 | 0.0337594 |
|
| sample_typeNasal swab:treatmentBenzonase | 0.0146779 | 0.0778149 | 68.96328 | 0.1886253 | 0.8509405 | |
| sample_typeSputum:treatmentBenzonase | 0.0964409 | 0.0802472 | 65.06581 | 1.2017975 | 0.2337980 | |
| sample_typeNasal swab:treatmentHost zero | 0.1083404 | 0.0760586 | 66.47624 | 1.4244332 | 0.1589988 | |
| sample_typeSputum:treatmentHost zero | 0.1771872 | 0.0812745 | 63.42192 | 2.1801082 | 0.0329681 |
|
| sample_typeNasal swab:treatmentMolysis | 0.2291696 | 0.0785680 | 69.77276 | 2.9168336 | 0.0047530 |
|
| sample_typeSputum:treatmentMolysis | 0.2317119 | 0.0822065 | 63.25919 | 2.8186574 | 0.0064296 |
|
| sample_typeNasal swab:treatmentQIAamp | 0.0571663 | 0.0759204 | 65.94314 | 0.7529771 | 0.4541435 | |
| sample_typeSputum:treatmentQIAamp | 0.1292876 | 0.0804119 | 64.06419 | 1.6078168 | 0.1127930 |
cat("ANOVA for \n BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)\n\n")
## ANOVA for
## BPI ~ sample_type * treatment + log10 (Final_reads) + (1|original_sample)
lmer_dbp %>%
anova() %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) | ||
|---|---|---|---|---|---|---|---|
| sample_type | 0.0031047 | 0.0015524 | 2 | 5.797922 | 0.2268168 | 0.8038192 | |
| treatment | 0.0257612 | 0.0051522 | 5 | 65.443949 | 0.7527933 | 0.5870575 | |
| log10(Final_reads) | 0.0718488 | 0.0718488 | 1 | 69.374684 | 10.4978010 | 0.0018365 |
|
| sample_type:treatment | 0.1252121 | 0.0125212 | 10 | 64.288564 | 1.8294710 | 0.0729245 |
cat("\n 0.07 p-value for the interaction term. May include the interaction term. \n")
##
## 0.07 p-value for the interaction term. May include the interaction term.
#Inverse Simpson - stratified
cat("BPI at NS\n\n")
## BPI at NS
cat("BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|subject_id), data = subset(sample_data, sample_data$sample_type == "Nasal swab")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.4017357 | 0.1498449 | 28 | 2.6810102 | 0.0121638 |
|
| treatmentlyPMA | 0.0412304 | 0.0372967 | 28 | 1.1054691 | 0.2783650 | |
| treatmentBenzonase | 0.0046045 | 0.0353930 | 28 | 0.1300974 | 0.8974195 | |
| treatmentHost zero | 0.0504577 | 0.0400376 | 28 | 1.2602602 | 0.2179777 | |
| treatmentMolysis | 0.0834844 | 0.0355813 | 28 | 2.3463015 | 0.0262687 |
|
| treatmentQIAamp | 0.0677258 | 0.0429693 | 28 | 1.5761426 | 0.1262256 | |
| log10(Final_reads) | 0.0468284 | 0.0227793 | 28 | 2.0557472 | 0.0492388 |
|
cat("BPI at BAL\n\n")
## BPI at BAL
cat("BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | 0.2445672 | 0.2956552 | 18.00596 | 0.8272041 | 0.4189496 | |
| treatmentlyPMA | -0.1456413 | 0.1032851 | 18.57372 | -1.4100912 | 0.1750396 | |
| treatmentBenzonase | -0.0313798 | 0.1076447 | 19.98145 | -0.2915125 | 0.7736642 | |
| treatmentHost zero | -0.0977753 | 0.1112261 | 19.81800 | -0.8790675 | 0.3898992 | |
| treatmentMolysis | -0.1733099 | 0.1136103 | 19.64832 | -1.5254772 | 0.1430763 | |
| treatmentQIAamp | -0.0368287 | 0.1139635 | 19.62045 | -0.3231626 | 0.7499909 | |
| log10(Final_reads) | 0.0886954 | 0.0530745 | 15.42411 | 1.6711477 | 0.1148535 |
cat("BPI at sputum\n\n")
## BPI at sputum
cat("BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)\n\n")
## BPI ~ sample_type + log10 (Final_reads) + (1|original_sample)
lmer(dbp ~ treatment + log10 (Final_reads) + (1|original_sample), data = subset(sample_data, sample_data$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Estimate | Std. Error | df | t value | Pr(>|t|) | ||
|---|---|---|---|---|---|---|
| (Intercept) | -0.2701952 | 0.1781304 | 20.96674 | -1.5168392 | 0.1442409 | |
| treatmentlyPMA | -0.0007038 | 0.0256334 | 19.62396 | -0.0274558 | 0.9783735 | |
| treatmentBenzonase | 0.0154609 | 0.0323782 | 20.05945 | 0.4775100 | 0.6381608 | |
| treatmentHost zero | -0.0343045 | 0.0545240 | 20.51301 | -0.6291638 | 0.5361864 | |
| treatmentMolysis | -0.0792267 | 0.0635945 | 20.57650 | -1.2458118 | 0.2268328 | |
| treatmentQIAamp | 0.0014680 | 0.0473113 | 20.43364 | 0.0310278 | 0.9755484 | |
| log10(Final_reads) | 0.1498736 | 0.0303847 | 20.74986 | 4.9325289 | 0.0000726 |
|
Function beta
A8. permanova of function beta diversity
--> both stratified and nonstratified
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_path_rpkm, S.obs != 0 & sample_type %in% c("BAL", "Nasal swab", "Sputum"))
bray_perm_uni <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + treatment + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_strata <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type + log10(Final_reads) + lypma + benzonase + host_zero + molysis + qiaamp,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_inter <- vegan::adonis2(distance(phyloseq_rel_nz, method="bray") ~ sample_type * treatment + log10(Final_reads) + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads) + subject_id,
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal swab") %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads) + subject_id,
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
bray_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="bray") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads) + subject_id,
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum") %>% sample_data %>% data.frame(check.names = F), permutations = 10000)
Results
Treatment 1/0
cat("\nUnivariate analysis\n")
##
## Univariate analysis
bray_perm_uni %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 1.557 | 0.344 | 53.603 | 0.00 |
|
| log10(Final reads) | 1 | 1.024 | 0.226 | 70.491 | 0.00 |
|
| Treatment | 5 | 0.124 | 0.027 | 1.703 | 0.14 | |
| Subject | 10 | 0.766 | 0.169 | 5.276 | 0.00 |
|
| Residual | 73 | 1.060 | 0.234 | NA | NA | |
| Total | 91 | 4.530 | 1.000 | NA | NA |
Detailed treatment
cat("\nDetailed treatment\n")
##
## Detailed treatment
bray_perm %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 1.557 | 0.344 | 53.603 | 0.000 |
|
| log10(Final reads) | 1 | 1.024 | 0.226 | 70.491 | 0.000 |
|
| lyPMA | 1 | 0.019 | 0.004 | 1.284 | 0.263 | |
| Benzonase | 1 | 0.008 | 0.002 | 0.579 | 0.459 | |
| Host zero | 1 | 0.008 | 0.002 | 0.564 | 0.460 | |
| Molysis | 1 | 0.087 | 0.019 | 6.020 | 0.015 |
|
| QIAamp | 1 | 0.001 | 0.000 | 0.067 | 0.836 | |
| Subject | 10 | 0.766 | 0.169 | 5.276 | 0.000 |
|
| Residual | 73 | 1.060 | 0.234 | NA | NA | |
| Total | 91 | 4.530 | 1.000 | NA | NA |
Strata term
cat("\n Strata -detailed treatment\n")
##
## Strata -detailed treatment
bray_perm_strata %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 1.557 | 0.344 | 35.378 | 0.000 |
|
| log10(Final reads) | 1 | 1.024 | 0.226 | 46.525 | 0.000 |
|
| lyPMA | 1 | 0.019 | 0.004 | 0.847 | 0.424 | |
| Benzonase | 1 | 0.008 | 0.002 | 0.382 | 0.521 | |
| Host zero | 1 | 0.008 | 0.002 | 0.372 | 0.570 | |
| Molysis | 1 | 0.087 | 0.019 | 3.973 | 0.054 | |
| QIAamp | 1 | 0.001 | 0.000 | 0.044 | 0.964 | |
| Residual | 83 | 1.826 | 0.403 | NA | NA | |
| Total | 91 | 4.530 | 1.000 | NA | NA |
Interaction term
cat("\n Interaction term \n")
##
## Interaction term
bray_perm_inter %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type X treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| Sample type | 2 | 1.557 | 0.344 | 66.198 | 0.000 |
|
| Treatment | 5 | 0.684 | 0.151 | 11.631 | 0.000 |
|
| log10(Final reads) | 1 | 0.463 | 0.102 | 39.412 | 0.000 |
|
| Subject | 10 | 0.766 | 0.169 | 6.515 | 0.000 |
|
| Sample type X treatment | 10 | 0.319 | 0.070 | 2.715 | 0.008 |
|
| Residual | 63 | 0.741 | 0.164 | NA | NA | |
| Total | 91 | 4.530 | 1.000 | NA | NA |
Stratified
cat("\n Stratified - nasal swab \n")
##
## Stratified - nasal swab
bray_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.011 | 0.042 | 2.049 | 0.147 | |
| Benzonase | 1 | 0.011 | 0.040 | 1.940 | 0.159 | |
| Host zero | 1 | 0.009 | 0.033 | 1.615 | 0.205 | |
| Molysis | 1 | 0.014 | 0.052 | 2.566 | 0.104 | |
| QIAamp | 1 | 0.054 | 0.197 | 9.648 | 0.003 |
|
| log10(Final reads) | 1 | 0.028 | 0.103 | 5.051 | 0.027 |
|
| Subject id | 2 | 0.001 | 0.003 | 0.083 | 0.980 | |
| Residual | 26 | 0.146 | 0.530 | NA | NA | |
| Total | 34 | 0.275 | 1.000 | NA | NA |
cat("\n Stratified - BAL \n")
##
## Stratified - BAL
bray_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.040 | 0.020 | 1.305 | 0.268 | |
| Benzonase | 1 | 0.115 | 0.058 | 3.716 | 0.070 | |
| Host zero | 1 | 0.027 | 0.014 | 0.886 | 0.370 | |
| Molysis | 1 | 0.180 | 0.091 | 5.820 | 0.028 |
|
| QIAamp | 1 | 0.001 | 0.000 | 0.029 | 0.904 | |
| log10(Final reads) | 1 | 0.603 | 0.306 | 19.535 | 0.001 |
|
| Subject id | 4 | 0.513 | 0.260 | 4.153 | 0.017 |
|
| Residual | 16 | 0.494 | 0.250 | NA | NA | |
| Total | 26 | 1.973 | 1.000 | NA | NA |
cat("\n Stratified - sputum \n")
##
## Stratified - sputum
bray_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
round(3) %>% mutate(` ` = case_when(abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "serif")
| Df | SumOfSqs | R2 | F | Pr(>F) | ||
|---|---|---|---|---|---|---|
| lyPMA | 1 | 0.017 | 0.023 | 3.920 | 0.053 | |
| Benzonase | 1 | 0.002 | 0.003 | 0.440 | 0.557 | |
| Host zero | 1 | 0.064 | 0.089 | 15.072 | 0.001 |
|
| Molysis | 1 | 0.139 | 0.191 | 32.510 | 0.000 |
|
| QIAamp | 1 | 0.370 | 0.509 | 86.595 | 0.000 |
|
| log10(Final reads) | 1 | 0.032 | 0.045 | 7.588 | 0.011 |
|
| Subject id | 4 | 0.021 | 0.029 | 1.224 | 0.332 | |
| Residual | 19 | 0.081 | 0.112 | NA | NA | |
| Total | 29 | 0.726 | 1.000 | NA | NA |
A9. DA analysis for function, by sample type and treatment
--> both stratified and nonstratified
--> multiple levels?
No normalization will be conducted for RPKM data.
Thread: https://forum.biobakery.org/t/maaslin-with-shortbred-results-and-panphlan/3102/2
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_path_rpkm, S.obs != 0 & sample_type %in% c("BAL", "Nasal swab", "Sputum"))
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, sample_data(phyloseq_rel_nz)$treatment, sep = ":")
taxa_names(phyloseq_rel_nz) <- tax_table(phyloseq_rel_nz) %>% data.frame() %>% .$group
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
#Checking number of bugs for the main text of the manuscript
cat("Number of differentially abundant bugs by each metadata")
## Number of differentially abundant bugs by each metadata
maaslin_all2 %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 100 125 162 90
## molysis qiaamp sample_type
## 185 66 288
cat("Decreased bugs by each metadata")
## Decreased bugs by each metadata
maaslin_all2 %>% subset(., .$qval < 0.1 & .$coef < 0) %>% .$metadata %>% table()
## .
## host_zero log10.Final_reads lypma molysis
## 1 31 2 2
## qiaamp sample_type
## 2 6
cat("Incrased")
## Incrased
maaslin_all2 %>% subset(., .$qval < 0.1 & .$coef > 0) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 100 124 131 88
## molysis qiaamp sample_type
## 183 64 282
cat("Number of differentially abundant bugs by each metadata in NS")
## Number of differentially abundant bugs by each metadata in NS
fit_data_ns2 %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 6 64 67 55
## molysis qiaamp
## 64 20
cat("Number of differentially abundant bugs by each metadata in BAL")
## Number of differentially abundant bugs by each metadata in BAL
fit_data_bal2 %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 3 12 103 10
## molysis qiaamp
## 14 4
cat("Number of differentially abundant bugs by each metadata in sputum")
## Number of differentially abundant bugs by each metadata in sputum
fit_data_spt2 %>% subset(., .$qval < 0.1) %>% .$metadata %>% table()
## .
## benzonase host_zero log10.Final_reads lypma
## 142 108 23 140
## molysis qiaamp
## 111 114
cat("\nAdding control data to both all and stratified MaAslin will help identify the actual contaminant\n")
##
## Adding control data to both all and stratified MaAslin will help identify the actual contaminant
#volcano plot of MaAslin with all data
ggplot(maaslin_all2, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "serif") +
#labs(tag = "A") +
ggtitle("MaAslin with treatment types")+
geom_point(size = 2) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
geom_richtext(aes( 4, 8, label = "*q*-value = 0.1, fold-change = 0", vjust = -1, fontface = 1), col = "grey", size = 3, family = "serif") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("#4daf4a", "#984ea3", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628")) +
guides(col = guide_legend(title = "Fixed effects", title.position = "top", nrow = 3))
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$min <- apply(sig_data, 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
# sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>% column_to_rownames(var = "feature") %>% species_italic %>% select(-c("-", "min", "log10.Final_reads"))
sig_data_sig <- ifelse(sig_data < 0.1, "*", NA)
return(list(data = sig_data, data_sig = sig_data_sig))
}
fit_data_bal2 <- make_sig_table(fit_data_bal2)
fit_data_ns2 <- make_sig_table(fit_data_ns2)
fit_data_spt2 <- make_sig_table(fit_data_spt2)
cat("\n\nStratified Masslin - significant of each but by treatment in BAL\n")
##
##
## Stratified Masslin - significant of each but by treatment in BAL
fit_data_bal2$data_sig
## feature benzonase host_zero log10.Final_reads lypma molysis qiaamp min
## 263 NA NA NA "*" NA NA NA "*"
## 276 NA NA NA "*" NA NA NA "*"
## 277 NA NA NA "*" NA NA NA "*"
## 301 NA NA NA "*" NA NA NA "*"
## 295 NA NA NA "*" NA NA NA "*"
## 264 NA NA NA "*" NA NA NA "*"
## 255 NA NA NA "*" NA NA NA "*"
## 83 NA NA NA "*" NA NA NA "*"
## 184 NA NA NA "*" NA NA NA "*"
## 76 NA NA NA "*" NA NA NA "*"
## 95 NA "*" "*" NA NA "*" NA "*"
## 81 NA NA NA "*" NA NA NA "*"
## 256 NA NA NA "*" NA NA NA "*"
## 132 NA NA NA "*" NA "*" NA "*"
## 7 NA NA NA "*" NA NA NA "*"
## 40 NA NA NA "*" NA NA NA "*"
## 41 NA NA NA "*" NA NA NA "*"
## 319 NA NA NA "*" NA NA NA "*"
## 210 NA NA NA "*" NA NA NA "*"
## 234 NA NA NA "*" NA NA NA "*"
cat("\n\nStratified Masslin - significant of each but by treatment in nasal swab\n")
##
##
## Stratified Masslin - significant of each but by treatment in nasal swab
fit_data_ns2$data_sig
## feature benzonase host_zero log10.Final_reads lypma molysis qiaamp min
## 135 NA "*" "*" "*" "*" "*" "*" "*"
## 173 NA NA NA "*" NA NA NA "*"
## 204 NA NA NA "*" NA NA NA "*"
## 229 NA NA NA NA NA NA "*" "*"
## 233 NA NA NA NA NA NA "*" "*"
## 129 NA "*" "*" "*" "*" "*" "*" "*"
## 67 NA NA "*" "*" "*" "*" NA "*"
## 227 NA "*" "*" "*" "*" "*" "*" "*"
## 71 NA NA NA "*" NA NA NA "*"
## 239 NA NA NA "*" NA NA NA "*"
## 37 NA "*" "*" "*" "*" "*" "*" "*"
## 44 NA NA "*" "*" "*" "*" "*" "*"
## 43 NA NA "*" "*" "*" "*" "*" "*"
## 174 NA NA NA "*" NA NA NA "*"
## 127 NA NA NA "*" NA NA NA "*"
## 185 NA NA "*" "*" "*" "*" "*" "*"
## 143 NA NA NA "*" NA NA NA "*"
## 13 NA "*" "*" "*" "*" "*" "*" "*"
## 100 NA "*" "*" NA "*" "*" "*" "*"
## 116 NA NA "*" NA "*" "*" NA "*"
cat("\n\nStratified Masslin - significant of each but by treatment in Sputum\n")
##
##
## Stratified Masslin - significant of each but by treatment in Sputum
fit_data_spt2$data_sig
## feature benzonase host_zero log10.Final_reads lypma molysis qiaamp min
## 228 NA "*" "*" NA "*" "*" "*" "*"
## 330 NA "*" "*" NA "*" "*" "*" "*"
## 26 NA "*" "*" NA "*" "*" "*" "*"
## 36 NA "*" "*" NA "*" "*" "*" "*"
## 375 NA "*" "*" NA "*" "*" "*" "*"
## 273 NA "*" "*" NA "*" "*" "*" "*"
## 372 NA "*" "*" NA "*" "*" "*" "*"
## 138 NA "*" "*" NA "*" "*" "*" "*"
## 159 NA "*" "*" NA "*" "*" "*" "*"
## 195 NA "*" "*" NA "*" "*" "*" "*"
## 19 NA "*" "*" NA "*" "*" "*" "*"
## 261 NA "*" "*" NA "*" "*" "*" "*"
## 307 NA "*" "*" NA "*" "*" "*" "*"
## 54 NA "*" "*" NA "*" "*" "*" "*"
## 374 NA "*" "*" NA "*" "*" "*" "*"
## 20 NA "*" "*" NA "*" "*" "*" "*"
## 101 NA "*" "*" NA "*" "*" "*" "*"
## 218 NA "*" "*" NA "*" "*" "*" "*"
## 253 NA "*" "*" NA "*" "*" "*" "*"
## 337 NA "*" "*" NA "*" "*" "*" "*"
Done.
#===============================================================================
#BTC.LineZero.Footer.1.1.0
#===============================================================================
#R markdown citation generator.
#===============================================================================
#RLB.Dependencies:
# magrittr, pacman, stringr
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#BTC.Dependencies:
# LineZero.Header
#===============================================================================
#Generates citations for each explicitly loaded library.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
str_libraries <- c("r", str_libraries)
for (str_libraries in str_libraries) {
str_libraries |>
pacman::p_citation() |>
print(bibtex = FALSE) |>
capture.output() %>%
.[-1:-3] %>% .[. != ""] |>
stringr::str_squish() |>
stringr::str_replace("_", "") |>
cat()
cat("\n")
}
## R Core Team (2022). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. We have invested a lot of time and effort in creating R, please cite it when using it for data analysis. See also 'citation("pkgname")' for citing R packages.
## Wickham H, Bryan J (2023). readxl: Read Excel Files_. R package version 1.4.2, <https://CRAN.R-project.org/package=readxl>.
## phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data. Paul J. McMurdie and Susan Holmes (2013) PLoS ONE 8(4):e61217.
## Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." Journal of Open Source Software_, *4*(43), 1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## Rinker, T. W. & Kurkiewicz, D. (2017). pacman: Package Management for R. version 0.5.0. Buffalo, New York. http://github.com/trinker/pacman
## Garbett SP, Stephens J, Simonov K, Xie Y, Dong Z, Wickham H, Horner J, reikoch, Beasley W, O'Connor B, Warnes GR, Quinn M, Kamvar ZN (2023). yaml: Methods to Convert R Data to YAML and Back_. R package version 2.3.7, <https://CRAN.R-project.org/package=yaml>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.
## Oksanen J, Simpson G, Blanchet F, Kindt R, Legendre P, Minchin P, O'Hara R, Solymos P, Stevens M, Szoecs E, Wagner H, Barbour M, Bedward M, Bolker B, Borcard D, Carvalho G, Chirico M, De Caceres M, Durand S, Evangelista H, FitzJohn R, Friendly M, Furneaux B, Hannigan G, Hill M, Lahti L, McGlinn D, Ouellette M, Ribeiro Cunha E, Smith T, Stier A, Ter Braak C, Weedon J (2022). vegan: Community Ecology Package. R package version 2.6-4, <https://CRAN.R-project.org/package=vegan>.
## Leo Lahti et al. microbiome R package. URL: http://microbiome.github.io
## Kassambara A (2023). ggpubr: 'ggplot2' Based Publication Ready Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Simon Garnier, Noam Ross, Robert Rudis, Antônio P. Camargo, Marco Sciaini, and Cédric Scherer (2021). Rvision - Colorblind-Friendly Color Maps for R. R package version 0.6.2.
## Davis NM, Proctor D, Holmes SP, Relman DA, Callahan BJ (2017). "Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data." bioRxiv_, 221499. doi:10.1101/221499 <https://doi.org/10.1101/221499>.
## Auguie B (2017). gridExtra: Miscellaneous Functions for "Grid" Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## Kassambara A (2023). ggpubr: 'ggplot2' Based Publication Ready Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker (2015). Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
## Kuznetsova A, Brockhoff PB, Christensen RHB (2017). "lmerTest Package: Tests in Linear Mixed Effects Models." Journal of Statistical Software, *82*(13), 1-26. doi:10.18637/jss.v082.i13 <https://doi.org/10.18637/jss.v082.i13>.
## Ooms J (2023). writexl: Export Data Frames to Excel 'xlsx' Format_. R package version 1.4.2, <https://CRAN.R-project.org/package=writexl>.
## Gonçalves da Silva A (2017). harrietr: Wrangle Phylogenetic Distance Matrices and Other Utilities. R package version 0.2.3, <https://CRAN.R-project.org/package=harrietr>.
## Mallick H et al. (2020). Multivariable Association in Population-scale Meta-omics Studies, http://huttenhower.sph.harvard.edu/maaslin2. To cite the MaAsLin 2 software, please use: Mallick H, Rahnavard A, McIver LJ (2020). MaAsLin 2: Multivariable Association in Population-scale Meta-omics Studies. R/Bioconductor package, http://huttenhower.sph.harvard.edu/maaslin2.
## Wilke C, Wiernik B (2022). ggtext: Improved Text Rendering Support for 'ggplot2'. R package version 0.1.2, <https://CRAN.R-project.org/package=ggtext>.
## Aphalo P (2022). ggpmisc: Miscellaneous Extensions to 'ggplot2'_. R package version 0.5.2, <https://CRAN.R-project.org/package=ggpmisc>.
## Auguie B (2017). gridExtra: Miscellaneous Functions for "Grid" Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## Wood S, Scheipl F (2020). gamm4: Generalized Additive Mixed Models using 'mgcv' and 'lme4'. R package version 0.2-6, <https://CRAN.R-project.org/package=gamm4>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.
## Zhu H (2021). kableExtra: Construct Complex Table with 'kable' and Pipe Syntax. R package version 1.3.4, <https://CRAN.R-project.org/package=kableExtra>.
## Yihui Xie (2023). knitr: A General-Purpose Package for Dynamic Report Generation in R. R package version 1.42. Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition. Chapman and Hall/CRC. ISBN 978-1498716963 Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible Research in R. In Victoria Stodden, Friedrich Leisch and Roger D. Peng, editors, Implementing Reproducible Computational Research. Chapman and Hall/CRC. ISBN 978-1466561595
#===============================================================================